Skip to content

Commit 7ce9397

Browse files
HCD-73: Fix replacing node stuck in hibernation state (#1626)
### What is the issue When the node gets replaced, it announces itself as hibernated (one of the silent-shutdown states). When the node replacement fails, as the other nodes see the replacing node in that state then the replacing node won't receive any gossip messages from the seed at subsequent startup - which ends up with an exception. ### What does this PR fix and why was it fixed This patch adds an explicit shutdown announcement via gossip to let other nodes know that the node was explicitly shut down - as it was due to the exception. That allows other nodes (seeds in particular) to contact the replacing node at its next startup, thus allowing it to retry the replacement.
1 parent 6de8ee1 commit 7ce9397

File tree

4 files changed

+126
-8
lines changed

4 files changed

+126
-8
lines changed

src/java/org/apache/cassandra/gms/Gossiper.java

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1588,11 +1588,13 @@ private void silentlyMarkDead(InetAddressAndPort addr, EndpointState localState)
15881588

15891589
/**
15901590
* This method is called whenever there is a "big" change in ep state (a generation change for a known node).
1591+
* It is public as the state change simulation is needed in testing, otherwise should not be used directly.
15911592
*
15921593
* @param ep endpoint
15931594
* @param epState EndpointState for the endpoint
15941595
*/
1595-
private void handleMajorStateChange(InetAddressAndPort ep, EndpointState epState)
1596+
@VisibleForTesting
1597+
public void handleMajorStateChange(InetAddressAndPort ep, EndpointState epState)
15961598
{
15971599
checkProperThreadForStateMutation();
15981600
EndpointState localEpState = endpointStateMap.get(ep);
@@ -2265,20 +2267,29 @@ public void stop()
22652267
EndpointState mystate = endpointStateMap.get(FBUtilities.getBroadcastAddressAndPort());
22662268
if (mystate != null && !isSilentShutdownState(mystate) && StorageService.instance.isJoined())
22672269
{
2268-
logger.info("Announcing shutdown");
2269-
addLocalApplicationState(ApplicationState.STATUS_WITH_PORT, StorageService.instance.valueFactory.shutdown(true));
2270-
addLocalApplicationState(ApplicationState.STATUS, StorageService.instance.valueFactory.shutdown(true));
2271-
Message message = Message.out(Verb.GOSSIP_SHUTDOWN, noPayload);
2272-
for (InetAddressAndPort ep : liveEndpoints)
2273-
MessagingService.instance().send(message, ep);
2274-
Uninterruptibles.sleepUninterruptibly(Integer.getInteger("cassandra.shutdown_announce_in_ms", 2000), TimeUnit.MILLISECONDS);
2270+
announceShutdown();
22752271
}
22762272
else
22772273
logger.warn("No local state, state is in silent shutdown, or node hasn't joined, not announcing shutdown");
22782274
if (scheduledGossipTask != null)
22792275
scheduledGossipTask.cancel(false);
22802276
}
22812277

2278+
/**
2279+
* This method sends the node shutdown status to all live endpoints.
2280+
* It does not close the gossiper itself.
2281+
*/
2282+
public void announceShutdown()
2283+
{
2284+
logger.info("Announcing shutdown");
2285+
addLocalApplicationState(ApplicationState.STATUS_WITH_PORT, StorageService.instance.valueFactory.shutdown(true));
2286+
addLocalApplicationState(ApplicationState.STATUS, StorageService.instance.valueFactory.shutdown(true));
2287+
Message message = Message.out(Verb.GOSSIP_SHUTDOWN, noPayload);
2288+
for (InetAddressAndPort ep : liveEndpoints)
2289+
MessagingService.instance().send(message, ep);
2290+
Uninterruptibles.sleepUninterruptibly(Integer.getInteger("cassandra.shutdown_announce_in_ms", 2000), TimeUnit.MILLISECONDS);
2291+
}
2292+
22822293
public boolean isEnabled()
22832294
{
22842295
ScheduledFuture<?> scheduledGossipTask = this.scheduledGossipTask;

src/java/org/apache/cassandra/service/StorageService.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1916,7 +1916,11 @@ public boolean bootstrap(final Collection<Token> tokens, long bootstrapTimeoutMi
19161916
Nodes.peers().remove(DatabaseDescriptor.getReplaceAddress());
19171917
}
19181918
if (!Gossiper.instance.seenAnySeed())
1919+
{
1920+
logger.info("Announcing shutdown to get out of the hibernation deadlock");
1921+
Gossiper.instance.announceShutdown();
19191922
throw new IllegalStateException("Unable to contact any seeds: " + Gossiper.instance.getSeeds());
1923+
}
19201924

19211925
if (Boolean.getBoolean("cassandra.reset_bootstrap_progress"))
19221926
{

test/distributed/org/apache/cassandra/distributed/impl/Instance.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,7 @@ public void startup(ICluster cluster)
651651
}
652652
catch (Throwable t)
653653
{
654+
startedAt.set(0);
654655
if (t instanceof RuntimeException)
655656
throw (RuntimeException) t;
656657
throw new RuntimeException(t);

test/distributed/org/apache/cassandra/distributed/test/hostreplacement/HostReplacementTest.java

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@
1919
package org.apache.cassandra.distributed.test.hostreplacement;
2020

2121
import java.io.IOException;
22+
import java.net.UnknownHostException;
2223
import java.util.Arrays;
2324
import java.util.List;
25+
import java.util.UUID;
2426

2527
import org.junit.Test;
2628
import org.slf4j.Logger;
@@ -32,10 +34,21 @@
3234
import org.apache.cassandra.distributed.api.Feature;
3335
import org.apache.cassandra.distributed.api.ICoordinator;
3436
import org.apache.cassandra.distributed.api.IInvokableInstance;
37+
import org.apache.cassandra.distributed.api.IIsolatedExecutor;
3538
import org.apache.cassandra.distributed.api.SimpleQueryResult;
3639
import org.apache.cassandra.distributed.api.TokenSupplier;
40+
import org.apache.cassandra.distributed.impl.InstanceConfig;
3741
import org.apache.cassandra.distributed.shared.AssertUtils;
42+
import org.apache.cassandra.distributed.shared.ClusterUtils;
43+
import org.apache.cassandra.distributed.shared.WithProperties;
3844
import org.apache.cassandra.distributed.test.TestBaseImpl;
45+
import org.apache.cassandra.gms.ApplicationState;
46+
import org.apache.cassandra.gms.EndpointState;
47+
import org.apache.cassandra.gms.Gossiper;
48+
import org.apache.cassandra.gms.VersionedValue;
49+
import org.apache.cassandra.io.util.FileUtils;
50+
import org.apache.cassandra.locator.InetAddressAndPort;
51+
import org.apache.cassandra.service.StorageService;
3952
import org.assertj.core.api.Assertions;
4053

4154
import static org.apache.cassandra.config.CassandraRelevantProperties.BOOTSTRAP_SKIP_SCHEMA_CHECK;
@@ -44,9 +57,13 @@
4457
import static org.apache.cassandra.distributed.shared.ClusterUtils.assertRingIs;
4558
import static org.apache.cassandra.distributed.shared.ClusterUtils.awaitRingHealthy;
4659
import static org.apache.cassandra.distributed.shared.ClusterUtils.awaitRingJoin;
60+
import static org.apache.cassandra.distributed.shared.ClusterUtils.getDirectories;
4761
import static org.apache.cassandra.distributed.shared.ClusterUtils.getTokenMetadataTokens;
4862
import static org.apache.cassandra.distributed.shared.ClusterUtils.replaceHostAndStart;
4963
import static org.apache.cassandra.distributed.shared.ClusterUtils.stopUnchecked;
64+
import static org.apache.cassandra.gms.Gossiper.Props.DISABLE_THREAD_VALIDATION;
65+
import static org.assertj.core.api.Assertions.assertThatExceptionOfType;
66+
import static org.junit.Assert.assertFalse;
5067

5168
public class HostReplacementTest extends TestBaseImpl
5269
{
@@ -205,6 +222,91 @@ public void seedGoesDownBeforeDownHost() throws IOException
205222
}
206223
}
207224

225+
/**
226+
* Make sure that a node stuck in hibernate state due to failed replacement can retry the replacement procedure and succeed.
227+
*/
228+
@Test
229+
public void retryingFailedReplaceWithNodeInHibernateState() throws IOException
230+
{
231+
try (WithProperties properties = new WithProperties())
232+
{
233+
properties.setProperty(DISABLE_THREAD_VALIDATION, "true");
234+
235+
// given a two node cluster with one seed
236+
TokenSupplier even = TokenSupplier.evenlyDistributedTokens(2);
237+
try (Cluster cluster = Cluster.build(2)
238+
.withConfig(c -> c.with(Feature.GOSSIP, Feature.NATIVE_PROTOCOL)
239+
.set(Constants.KEY_DTEST_API_STARTUP_FAILURE_AS_SHUTDOWN, true))
240+
.withTokenSupplier(node -> even.token(node == 3 ? 2 : node))
241+
.start() )
242+
{
243+
IInvokableInstance seed = cluster.get(1);
244+
IInvokableInstance nodeToReplace = cluster.get(2);
245+
246+
setupCluster(cluster);
247+
SimpleQueryResult expectedState = nodeToReplace.coordinator().executeWithResult("SELECT * FROM " + KEYSPACE + ".tbl", ConsistencyLevel.ALL);
248+
249+
// when
250+
// stop the node to replace
251+
stopUnchecked(nodeToReplace);
252+
// wipe the node to replace
253+
getDirectories(nodeToReplace).forEach(FileUtils::deleteRecursive);
254+
255+
String toReplaceAddress = nodeToReplace.config().broadcastAddress().getAddress().getHostAddress();
256+
// set hibernate status for the node to replace on seed
257+
seed.runOnInstance(putInHibernation(toReplaceAddress));
258+
259+
// we need to fake a new host id
260+
((InstanceConfig) nodeToReplace.config()).setHostId(UUID.randomUUID());
261+
// enable autoboostrap
262+
nodeToReplace.config().set("auto_bootstrap", true);
263+
264+
// first replacement will fail as the node was announced as hibernated and no-one can contact it as startup
265+
assertThatExceptionOfType(IllegalStateException.class).isThrownBy(() -> {
266+
ClusterUtils.start(nodeToReplace, props -> {
267+
// set the replacement address
268+
props.setProperty("cassandra.replace_address", toReplaceAddress);
269+
});
270+
}).withMessageContaining("Unable to contact any seeds");
271+
272+
// then
273+
// retrying replacement will succeed as the node announced itself as shutdown before killing itself
274+
ClusterUtils.start(nodeToReplace, props -> {
275+
// set the replacement address
276+
props.setProperty("cassandra.replace_address", toReplaceAddress);
277+
});
278+
assertFalse("replaces node should be up", nodeToReplace.isShutdown());
279+
280+
// the data after replacement should be consistent
281+
awaitRingJoin(seed, nodeToReplace);
282+
awaitRingJoin(nodeToReplace, seed);
283+
284+
validateRows(seed.coordinator(), expectedState);
285+
validateRows(nodeToReplace.coordinator(), expectedState);
286+
}
287+
}
288+
}
289+
290+
private static IIsolatedExecutor.SerializableRunnable putInHibernation(String address)
291+
{
292+
return () -> {
293+
InetAddressAndPort endpoint;
294+
try
295+
{
296+
endpoint = InetAddressAndPort.getByName(address);
297+
}
298+
catch (UnknownHostException e)
299+
{
300+
throw new RuntimeException(e);
301+
}
302+
EndpointState epState = Gossiper.instance.getEndpointStateForEndpoint(endpoint);
303+
VersionedValue newStatus = StorageService.instance.valueFactory.hibernate(true);
304+
epState.addApplicationState(ApplicationState.STATUS, newStatus);
305+
epState.addApplicationState(ApplicationState.STATUS_WITH_PORT, newStatus);
306+
Gossiper.instance.handleMajorStateChange(endpoint, epState);
307+
};
308+
}
309+
208310
static void setupCluster(Cluster cluster)
209311
{
210312
fixDistributedSchemas(cluster);

0 commit comments

Comments
 (0)