Skip to content

Commit 35615d9

Browse files
authored
RATIS-2261. Intermittent failure in TestRaftSnapshotWithGrpc.testInstallSnapshotDuringBootstrap. (#1287)
1 parent d11604c commit 35615d9

File tree

5 files changed

+54
-72
lines changed

5 files changed

+54
-72
lines changed

ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java

Lines changed: 36 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,12 @@
4949
import org.apache.ratis.util.JavaUtils;
5050
import org.apache.ratis.util.LifeCycle;
5151
import org.apache.ratis.util.Slf4jUtils;
52-
import org.junit.jupiter.api.AfterEach;
5352
import org.junit.jupiter.api.Assertions;
54-
import org.junit.jupiter.api.BeforeEach;
5553
import org.junit.jupiter.api.Test;
5654
import org.slf4j.Logger;
5755
import org.slf4j.LoggerFactory;
5856

5957
import java.io.File;
60-
import java.io.IOException;
6158
import java.util.Arrays;
6259
import java.util.List;
6360
import java.util.Optional;
@@ -67,11 +64,18 @@
6764
import org.apache.ratis.thirdparty.com.codahale.metrics.Timer;
6865
import org.slf4j.event.Level;
6966

70-
public abstract class RaftSnapshotBaseTest extends BaseTest {
67+
public abstract class RaftSnapshotBaseTest<CLUSTER extends MiniRaftCluster>
68+
extends BaseTest
69+
implements MiniRaftCluster.Factory.Get<CLUSTER> {
7170
{
7271
Slf4jUtils.setLogLevel(RaftServer.Division.LOG, Level.DEBUG);
7372
Slf4jUtils.setLogLevel(RaftLog.LOG, Level.DEBUG);
74-
Slf4jUtils.setLogLevel(RaftClient.LOG, Level.DEBUG);
73+
74+
final RaftProperties p = getProperties();
75+
p.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, SimpleStateMachine4Testing.class, StateMachine.class);
76+
RaftServerConfigKeys.Snapshot.setAutoTriggerThreshold(p, SNAPSHOT_TRIGGER_THRESHOLD);
77+
RaftServerConfigKeys.Snapshot.setAutoTriggerEnabled(p, true);
78+
RaftServerConfigKeys.LeaderElection.setMemberMajorityAdd(p, true);
7579
}
7680

7781
static final Logger LOG = LoggerFactory.getLogger(RaftSnapshotBaseTest.class);
@@ -119,38 +123,20 @@ public static void assertLogContent(RaftServer.Division server, boolean isLeader
119123
}
120124
}
121125

122-
private MiniRaftCluster cluster;
123-
124-
public abstract MiniRaftCluster.Factory<?> getFactory();
125-
126-
@BeforeEach
127-
public void setup() throws IOException {
128-
final RaftProperties prop = new RaftProperties();
129-
prop.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY,
130-
SimpleStateMachine4Testing.class, StateMachine.class);
131-
RaftServerConfigKeys.Snapshot.setAutoTriggerThreshold(
132-
prop, SNAPSHOT_TRIGGER_THRESHOLD);
133-
RaftServerConfigKeys.Snapshot.setAutoTriggerEnabled(prop, true);
134-
this.cluster = getFactory().newCluster(1, prop);
135-
cluster.start();
136-
}
137-
138-
@AfterEach
139-
public void tearDown() {
140-
if (cluster != null) {
141-
cluster.shutdown();
142-
}
143-
}
144-
145126
/**
146127
* Keep generating writing traffic and make sure snapshots are taken.
147128
* We then restart the whole raft peer and check if it can correctly load
148129
* snapshots + raft log.
149130
*/
150131
@Test
151132
public void testRestartPeer() throws Exception {
152-
RaftTestUtil.waitForLeader(cluster);
153-
final RaftPeerId leaderId = cluster.getLeader().getId();
133+
runWithNewCluster(1, this::runTestRestartPeer);
134+
135+
}
136+
137+
void runTestRestartPeer(CLUSTER cluster) throws Exception {
138+
LOG.info("runTestRestartPeer");
139+
final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId();
154140
int i = 0;
155141
try(final RaftClient client = cluster.createClient(leaderId)) {
156142
for (; i < SNAPSHOT_TRIGGER_THRESHOLD * 2 - 1; i++) {
@@ -180,7 +166,7 @@ public void testRestartPeer() throws Exception {
180166

181167
public static boolean exists(File f) {
182168
if (f.exists()) {
183-
LOG.info("File exists: " + f);
169+
LOG.info("File exists: {}", f);
184170
return true;
185171
}
186172
return false;
@@ -193,11 +179,15 @@ public static boolean exists(File f) {
193179
*/
194180
@Test
195181
public void testBasicInstallSnapshot() throws Exception {
182+
runWithNewCluster(1, this::runTestBasicInstallSnapshot);
183+
}
184+
185+
void runTestBasicInstallSnapshot(CLUSTER cluster) throws Exception {
186+
LOG.info("runTestBasicInstallSnapshot");
196187
final List<LogSegmentPath> logs;
197188
int i = 0;
198189
try {
199-
RaftTestUtil.waitForLeader(cluster);
200-
final RaftPeerId leaderId = cluster.getLeader().getId();
190+
final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId();
201191

202192
try(final RaftClient client = cluster.createClient(leaderId)) {
203193
for (; i < SNAPSHOT_TRIGGER_THRESHOLD * 2 - 1; i++) {
@@ -236,16 +226,14 @@ public void testBasicInstallSnapshot() throws Exception {
236226
Assertions.assertTrue(client.io().send(new SimpleMessage("m" + i)).isSuccess());
237227
}
238228

239-
// add two more peers
240-
String[] newPeers = new String[]{"s3", "s4"};
241-
MiniRaftCluster.PeerChanges change = cluster.addNewPeers(
242-
newPeers, true, false);
229+
// add a new peer
230+
final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(1, true, true);
243231
// trigger setConfiguration
244232
RaftServerTestUtil.runWithMinorityPeers(cluster, Arrays.asList(change.allPeersInNewConf),
245233
peers -> cluster.setConfiguration(peers.toArray(RaftPeer.emptyArray())));
246234

247-
for (String newPeer : newPeers) {
248-
final RaftServer.Division s = cluster.getDivision(RaftPeerId.valueOf(newPeer));
235+
for (RaftPeer newPeer : change.newPeers) {
236+
final RaftServer.Division s = cluster.getDivision(newPeer.getId());
249237
SimpleStateMachine4Testing simpleStateMachine = SimpleStateMachine4Testing.get(s);
250238
Assertions.assertSame(LifeCycle.State.RUNNING, simpleStateMachine.getLifeCycleState());
251239
}
@@ -275,6 +263,11 @@ public void testBasicInstallSnapshot() throws Exception {
275263
*/
276264
@Test
277265
public void testInstallSnapshotDuringBootstrap() throws Exception {
266+
runWithNewCluster(1, this::runTestInstallSnapshotDuringBootstrap);
267+
}
268+
269+
void runTestInstallSnapshotDuringBootstrap(CLUSTER cluster) throws Exception {
270+
LOG.info("runTestInstallSnapshotDuringBootstrap");
278271
int i = 0;
279272
try {
280273
RaftTestUtil.waitForLeader(cluster);
@@ -299,16 +292,14 @@ public void testInstallSnapshotDuringBootstrap() throws Exception {
299292

300293
assertLeaderContent(cluster);
301294

302-
// add two more peers
303-
String[] newPeers = new String[]{"s3", "s4"};
304-
MiniRaftCluster.PeerChanges change = cluster.addNewPeers(
305-
newPeers, true, false);
295+
// add a new peer
296+
final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(1, true, true);
306297
// trigger setConfiguration
307298
RaftServerTestUtil.runWithMinorityPeers(cluster, Arrays.asList(change.allPeersInNewConf),
308299
peers -> cluster.setConfiguration(peers.toArray(RaftPeer.emptyArray())));
309300

310-
for (String newPeer : newPeers) {
311-
final RaftServer.Division s = cluster.getDivision(RaftPeerId.valueOf(newPeer));
301+
for (RaftPeer newPeer : change.newPeers) {
302+
final RaftServer.Division s = cluster.getDivision(newPeer.getId());
312303
SimpleStateMachine4Testing simpleStateMachine = SimpleStateMachine4Testing.get(s);
313304
Assertions.assertSame(LifeCycle.State.RUNNING, simpleStateMachine.getLifeCycleState());
314305
}

ratis-server/src/test/java/org/apache/ratis/statemachine/impl/SimpleStateMachine4Testing.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ private void put(LogEntryProto entry) {
210210
@Override
211211
public synchronized void initialize(RaftServer server, RaftGroupId raftGroupId,
212212
RaftStorage raftStorage) throws IOException {
213-
LOG.info("Initializing " + this);
213+
LOG.info("Initializing {}", this);
214214
this.groupId = raftGroupId;
215215
getLifeCycle().startAndTransition(() -> {
216216
super.initialize(server, raftGroupId, raftStorage);
@@ -233,7 +233,10 @@ public synchronized void pause() {
233233

234234
@Override
235235
public synchronized void reinitialize() throws IOException {
236-
LOG.info("Reinitializing " + this);
236+
LOG.info("Reinitializing {}", this);
237+
indexMap.clear();
238+
dataMap.clear();
239+
237240
loadSnapshot(storage.getLatestSnapshot());
238241
if (getLifeCycleState() == LifeCycle.State.PAUSED) {
239242
getLifeCycle().transition(LifeCycle.State.STARTING);
@@ -328,14 +331,14 @@ public CompletableFuture<Message> query(Message request) {
328331
final String string = request.getContent().toStringUtf8();
329332
Exception exception;
330333
try {
331-
LOG.info("query " + string);
334+
LOG.info("query {}", string);
332335
final LogEntryProto entry = dataMap.get(string);
333336
if (entry != null) {
334337
return CompletableFuture.completedFuture(Message.valueOf(entry.toByteString()));
335338
}
336339
exception = new IndexOutOfBoundsException(getId() + ": LogEntry not found for query " + string);
337340
} catch (Exception e) {
338-
LOG.warn("Failed request " + request, e);
341+
LOG.warn("Failed request {}", request, e);
339342
exception = e;
340343
}
341344
return JavaUtils.completeExceptionally(new StateMachineException(

ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftSnapshotWithGrpc.java

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,22 +20,16 @@
2020
import java.util.Optional;
2121

2222
import org.apache.ratis.metrics.LongCounter;
23-
import org.apache.ratis.server.impl.MiniRaftCluster;
2423
import org.apache.ratis.metrics.MetricRegistries;
2524
import org.apache.ratis.metrics.MetricRegistryInfo;
2625
import org.apache.ratis.metrics.RatisMetricRegistry;
2726
import org.apache.ratis.server.RaftServer;
2827
import org.apache.ratis.statemachine.RaftSnapshotBaseTest;
29-
import org.apache.ratis.test.tag.Flaky;
3028
import org.junit.jupiter.api.Assertions;
3129

32-
@Flaky("RATIS-2261")
33-
public class TestRaftSnapshotWithGrpc extends RaftSnapshotBaseTest {
34-
@Override
35-
public MiniRaftCluster.Factory<?> getFactory() {
36-
return MiniRaftClusterWithGrpc.FACTORY;
37-
}
38-
30+
public class TestRaftSnapshotWithGrpc
31+
extends RaftSnapshotBaseTest<MiniRaftClusterWithGrpc>
32+
implements MiniRaftClusterWithGrpc.FactoryGet {
3933
@Override
4034
protected void verifyInstallSnapshotMetric(RaftServer.Division leader) {
4135
MetricRegistryInfo info = new MetricRegistryInfo(leader.getMemberId().toString(),

ratis-test/src/test/java/org/apache/ratis/netty/TestRaftSnapshotWithNetty.java

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/**
1+
/*
22
* Licensed to the Apache Software Foundation (ASF) under one
33
* or more contributor license agreements. See the NOTICE file
44
* distributed with this work for additional information
@@ -17,12 +17,9 @@
1717
*/
1818
package org.apache.ratis.netty;
1919

20-
import org.apache.ratis.server.impl.MiniRaftCluster;
2120
import org.apache.ratis.statemachine.RaftSnapshotBaseTest;
2221

23-
public class TestRaftSnapshotWithNetty extends RaftSnapshotBaseTest {
24-
@Override
25-
public MiniRaftCluster.Factory<?> getFactory() {
26-
return MiniRaftClusterWithNetty.FACTORY;
27-
}
22+
public class TestRaftSnapshotWithNetty
23+
extends RaftSnapshotBaseTest<MiniRaftClusterWithNetty>
24+
implements MiniRaftClusterWithNetty.FactoryGet {
2825
}

ratis-test/src/test/java/org/apache/ratis/server/simulation/TestRaftSnapshotWithSimulatedRpc.java

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/**
1+
/*
22
* Licensed to the Apache Software Foundation (ASF) under one
33
* or more contributor license agreements. See the NOTICE file
44
* distributed with this work for additional information
@@ -17,12 +17,9 @@
1717
*/
1818
package org.apache.ratis.server.simulation;
1919

20-
import org.apache.ratis.server.impl.MiniRaftCluster;
2120
import org.apache.ratis.statemachine.RaftSnapshotBaseTest;
2221

23-
public class TestRaftSnapshotWithSimulatedRpc extends RaftSnapshotBaseTest {
24-
@Override
25-
public MiniRaftCluster.Factory<?> getFactory() {
26-
return MiniRaftClusterWithSimulatedRpc.FACTORY;
27-
}
22+
public class TestRaftSnapshotWithSimulatedRpc
23+
extends RaftSnapshotBaseTest<MiniRaftClusterWithSimulatedRpc>
24+
implements MiniRaftClusterWithSimulatedRpc.FactoryGet {
2825
}

0 commit comments

Comments
 (0)