Skip to content

Commit 6d32e2f

Browse files
authored
[7.15] Ensure Node Shutdown doesn't stall when all nodes in the cluster have a copy of a shard (#78578) (#78719)
* Ensure Node Shutdown doesn't stall when all nodes in the cluster have a copy of a shard (#78578) * Fix compilation for 7.x branches * Fix compilation for 7.15.1 specifically This commit removes a param from a method call that is not present in 7.15.1, only 7.16.0 and up.
1 parent ab01ffc commit 6d32e2f

File tree

3 files changed

+147
-5
lines changed

3 files changed

+147
-5
lines changed

x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownShardsIT.java

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,27 @@
77

88
package org.elasticsearch.xpack.shutdown;
99

10+
import org.apache.logging.log4j.message.ParameterizedMessage;
1011
import org.elasticsearch.Build;
1112
import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
1213
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
14+
import org.elasticsearch.action.index.IndexRequestBuilder;
1315
import org.elasticsearch.action.support.master.AcknowledgedResponse;
16+
import org.elasticsearch.cluster.ClusterState;
17+
import org.elasticsearch.cluster.metadata.IndexMetadata;
1418
import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata;
1519
import org.elasticsearch.cluster.node.DiscoveryNode;
20+
import org.elasticsearch.cluster.routing.ShardRouting;
21+
import org.elasticsearch.cluster.routing.ShardRoutingState;
22+
import org.elasticsearch.cluster.routing.UnassignedInfo;
1623
import org.elasticsearch.common.settings.Settings;
1724
import org.elasticsearch.plugins.Plugin;
1825
import org.elasticsearch.test.ESIntegTestCase;
1926
import org.elasticsearch.test.InternalTestCluster;
2027

2128
import java.util.Arrays;
2229
import java.util.Collection;
30+
import java.util.List;
2331

2432
import static org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata.Status.COMPLETE;
2533
import static org.hamcrest.Matchers.equalTo;
@@ -134,6 +142,71 @@ public void testShardStatusIsCompleteOnNonDataNodes() throws Exception {
134142
assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE));
135143
}
136144

145+
/**
146+
* Checks that, if we get to a situation where a shard can't move because all other nodes already have a copy of that shard,
147+
* we'll still return COMPLETE instead of STALLED.
148+
*/
149+
public void testNotStalledIfAllShardsHaveACopyOnAnotherNode() throws Exception {
150+
internalCluster().startNodes(2);
151+
152+
final String indexName = "test";
153+
prepareCreate(indexName).setSettings(
154+
Settings.builder()
155+
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
156+
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) // <- Ensure we have a copy of the shard on both nodes
157+
.put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), 0) // Disable "normal" delayed allocation
158+
).get();
159+
ensureGreen(indexName);
160+
indexRandomData();
161+
162+
String nodeToStopId = findIdOfNodeWithPrimaryShard(indexName);
163+
PutShutdownNodeAction.Request putShutdownRequest = new PutShutdownNodeAction.Request(
164+
nodeToStopId,
165+
SingleNodeShutdownMetadata.Type.REMOVE,
166+
this.getTestName(),
167+
null
168+
);
169+
AcknowledgedResponse putShutdownResponse = client().execute(PutShutdownNodeAction.INSTANCE, putShutdownRequest).get();
170+
assertTrue(putShutdownResponse.isAcknowledged());
171+
172+
assertBusy(() -> {
173+
GetShutdownStatusAction.Response getResp = client().execute(
174+
GetShutdownStatusAction.INSTANCE,
175+
new GetShutdownStatusAction.Request(nodeToStopId)
176+
).get();
177+
178+
assertThat(getResp.getShutdownStatuses().get(0).migrationStatus().getStatus(), equalTo(COMPLETE));
179+
});
180+
}
181+
182+
private void indexRandomData() throws Exception {
183+
int numDocs = scaledRandomIntBetween(100, 1000);
184+
IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
185+
for (int i = 0; i < builders.length; i++) {
186+
builders[i] = client().prepareIndex("test", "_doc").setSource("field", "value");
187+
}
188+
indexRandom(true, builders);
189+
}
190+
191+
private String findIdOfNodeWithPrimaryShard(String indexName) {
192+
ClusterState state = client().admin().cluster().prepareState().get().getState();
193+
List<ShardRouting> startedShards = state.routingTable().shardsWithState(ShardRoutingState.STARTED);
194+
return startedShards.stream()
195+
.filter(ShardRouting::primary)
196+
.filter(shardRouting -> indexName.equals(shardRouting.index().getName()))
197+
.map(ShardRouting::currentNodeId)
198+
.findFirst()
199+
.orElseThrow(
200+
() -> new AssertionError(
201+
new ParameterizedMessage(
202+
"could not find a primary shard of index [{}] in list of started shards [{}]",
203+
indexName,
204+
startedShards
205+
)
206+
)
207+
);
208+
}
209+
137210
private String getNodeId(String nodeName) throws Exception {
138211
NodesInfoResponse nodes = client().admin().cluster().prepareNodesInfo().clear().get();
139212
return nodes.getNodes()

x-pack/plugin/shutdown/src/main/java/org/elasticsearch/xpack/shutdown/TransportGetShutdownStatusAction.java

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.elasticsearch.cluster.routing.allocation.AllocationDecision;
2929
import org.elasticsearch.cluster.routing.allocation.AllocationService;
3030
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
31+
import org.elasticsearch.cluster.routing.allocation.ShardAllocationDecision;
3132
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders;
3233
import org.elasticsearch.cluster.service.ClusterService;
3334
import org.elasticsearch.common.Strings;
@@ -44,6 +45,8 @@
4445
import java.util.Map;
4546
import java.util.Objects;
4647
import java.util.Optional;
48+
import java.util.Set;
49+
import java.util.concurrent.atomic.AtomicInteger;
4750
import java.util.stream.Collectors;
4851

4952
public class TransportGetShutdownStatusAction extends TransportMasterNodeAction<
@@ -220,8 +223,13 @@ static ShutdownShardMigrationStatus shardMigrationStatus(
220223
);
221224
allocation.setDebugMode(RoutingAllocation.DebugMode.EXCLUDE_YES_DECISIONS);
222225

226+
// We also need the set of node IDs which are currently shutting down.
227+
Set<String> shuttingDownNodes = currentState.metadata().nodeShutdowns().keySet();
228+
229+
AtomicInteger shardsToIgnoreForFinalStatus = new AtomicInteger(0);
230+
223231
// Explain shard allocations until we find one that can't move, then stop (as `findFirst` short-circuits)
224-
final Optional<ShardRouting> unmovableShard = currentState.getRoutingNodes()
232+
Optional<Tuple<ShardRouting, ShardAllocationDecision>> unmovableShard = currentState.getRoutingNodes()
225233
.node(nodeId)
226234
.shardsWithState(ShardRoutingState.STARTED)
227235
.stream()
@@ -236,6 +244,21 @@ static ShutdownShardMigrationStatus shardMigrationStatus(
236244
.filter(pair -> pair.v2().getMoveDecision().getAllocationDecision().equals(AllocationDecision.THROTTLED) == false)
237245
// These shards will move as soon as possible
238246
.filter(pair -> pair.v2().getMoveDecision().getAllocationDecision().equals(AllocationDecision.YES) == false)
247+
// If the shard that can't move is on every node in the cluster, we shouldn't be `STALLED` on it.
248+
.filter(pair -> {
249+
final boolean hasShardCopyOnOtherNode = currentState.routingTable()
250+
.allShards(pair.v1().index().getName())
251+
.stream()
252+
.filter(shardRouting -> shardRouting.id() == pair.v1().id())
253+
// If any shards are both 1) `STARTED` and 2) are not on a node that's shutting down, we have at least one copy
254+
// of this shard safely on a node that's not shutting down, so we don't want to report `STALLED` because of this shard.
255+
.filter(ShardRouting::started)
256+
.anyMatch(routing -> shuttingDownNodes.contains(routing.currentNodeId()) == false);
257+
if (hasShardCopyOnOtherNode) {
258+
shardsToIgnoreForFinalStatus.incrementAndGet();
259+
}
260+
return hasShardCopyOnOtherNode == false;
261+
})
239262
.peek(pair -> {
240263
if (logger.isTraceEnabled()) { // don't serialize the decision unless we have to
241264
logger.trace(
@@ -249,12 +272,19 @@ static ShutdownShardMigrationStatus shardMigrationStatus(
249272
);
250273
}
251274
})
252-
.map(Tuple::v1)
253275
.findFirst();
254276

255-
if (unmovableShard.isPresent()) {
277+
if (totalRemainingShards == shardsToIgnoreForFinalStatus.get() && unmovableShard.isPresent() == false) {
278+
return new ShutdownShardMigrationStatus(
279+
SingleNodeShutdownMetadata.Status.COMPLETE,
280+
0,
281+
"["
282+
+ shardsToIgnoreForFinalStatus.get()
283+
+ "] shards cannot be moved away from this node but have at least one copy on another node in the cluster"
284+
);
285+
} else if (unmovableShard.isPresent()) {
256286
// We found a shard that can't be moved, so shard relocation is stalled. Blame the unmovable shard.
257-
ShardRouting shardRouting = unmovableShard.get();
287+
ShardRouting shardRouting = unmovableShard.get().v1();
258288

259289
return new ShutdownShardMigrationStatus(
260290
SingleNodeShutdownMetadata.Status.STALLED,
@@ -267,7 +297,6 @@ static ShutdownShardMigrationStatus shardMigrationStatus(
267297
).getFormattedMessage()
268298
);
269299
} else {
270-
// We couldn't find any shards that can't be moved, so we're just waiting for other recoveries or initializing shards
271300
return new ShutdownShardMigrationStatus(SingleNodeShutdownMetadata.Status.IN_PROGRESS, totalRemainingShards);
272301
}
273302
}

x-pack/plugin/shutdown/src/test/java/org/elasticsearch/xpack/shutdown/TransportGetShutdownStatusActionTests.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,46 @@ public void testStalled() {
365365
);
366366
}
367367

368+
public void testNotStalledIfAllShardsHaveACopyOnAnotherNode() {
369+
Index index = new Index(randomAlphaOfLength(5), randomAlphaOfLengthBetween(1, 20));
370+
IndexMetadata imd = generateIndexMetadata(index, 3, 0);
371+
IndexRoutingTable indexRoutingTable = IndexRoutingTable.builder(index)
372+
.addShard(TestShardRouting.newShardRouting(new ShardId(index, 0), LIVE_NODE_ID, false, ShardRoutingState.STARTED))
373+
.addShard(TestShardRouting.newShardRouting(new ShardId(index, 0), SHUTTING_DOWN_NODE_ID, true, ShardRoutingState.STARTED))
374+
.build();
375+
376+
// Force a decision of NO for all moves and new allocations, simulating a decider that's stuck
377+
canAllocate.set((r, n, a) -> Decision.NO);
378+
// And the remain decider simulates NodeShutdownAllocationDecider
379+
canRemain.set((r, n, a) -> n.nodeId().equals(SHUTTING_DOWN_NODE_ID) ? Decision.NO : Decision.YES);
380+
381+
RoutingTable.Builder routingTable = RoutingTable.builder();
382+
routingTable.add(indexRoutingTable);
383+
ClusterState state = createTestClusterState(
384+
routingTable.build(),
385+
org.elasticsearch.core.List.of(imd),
386+
SingleNodeShutdownMetadata.Type.REMOVE
387+
);
388+
389+
ShutdownShardMigrationStatus status = TransportGetShutdownStatusAction.shardMigrationStatus(
390+
state,
391+
SHUTTING_DOWN_NODE_ID,
392+
SingleNodeShutdownMetadata.Type.REMOVE,
393+
true,
394+
clusterInfoService,
395+
snapshotsInfoService,
396+
allocationService,
397+
allocationDeciders
398+
);
399+
400+
assertShardMigration(
401+
status,
402+
SingleNodeShutdownMetadata.Status.COMPLETE,
403+
0,
404+
containsString("[1] shards cannot be moved away from this node but have at least one copy on another node in the cluster")
405+
);
406+
}
407+
368408
public void testOnlyInitializingShardsRemaining() {
369409
Index index = new Index(randomAlphaOfLength(5), randomAlphaOfLengthBetween(1, 20));
370410
IndexMetadata imd = generateIndexMetadata(index, 3, 0);

0 commit comments

Comments
 (0)