@@ -96,6 +96,7 @@ public BookieAutoRecoveryTest() throws IOException, KeeperException,
9696
9797 @ Override
9898 public void setUp () throws Exception {
99+ LOG .info ("===> Start setUp" );
99100 super .setUp ();
100101 baseConf .setMetadataServiceUri (zkUtil .getMetadataServiceUri ());
101102 baseClientConf .setMetadataServiceUri (zkUtil .getMetadataServiceUri ());
@@ -117,10 +118,12 @@ public void setUp() throws Exception {
117118 mFactory = metadataClientDriver .getLedgerManagerFactory ();
118119 underReplicationManager = mFactory .newLedgerUnderreplicationManager ();
119120 ledgerManager = mFactory .newLedgerManager ();
121+ LOG .info ("===> Finished setUp" );
120122 }
121123
122124 @ Override
123125 public void tearDown () throws Exception {
126+ LOG .info ("===> Start tearDown" );
124127 super .tearDown ();
125128
126129 if (null != underReplicationManager ) {
@@ -138,6 +141,7 @@ public void tearDown() throws Exception {
138141 if (null != scheduler ) {
139142 scheduler .shutdown ();
140143 }
144+ LOG .info ("===> Finished tearDown" );
141145 }
142146
143147 /**
@@ -146,7 +150,7 @@ public void tearDown() throws Exception {
146150 */
147151 @ Test
148152 public void testOpenLedgers () throws Exception {
149- LOG .info ("===> Testing open ledgers " );
153+ LOG .info ("===> Start testOpenLedgers " );
150154 List <LedgerHandle > listOfLedgerHandle = createLedgersAndAddEntries (1 , 5 );
151155 LedgerHandle lh = listOfLedgerHandle .get (0 );
152156 int ledgerReplicaIndex = 0 ;
@@ -156,20 +160,16 @@ public void testOpenLedgers() throws Exception {
156160 ledgerReplicaIndex = getReplicaIndexInLedger (lh , replicaToKillAddr );
157161
158162 CountDownLatch latch = new CountDownLatch (1 );
159- LOG .info ("===> 1 Watching on urLedgerPath:" + urLedgerZNode
160- + " to know the status of rereplication process" );
161163 assertNull ("UrLedger already exists!" ,
162164 watchUrLedgerNode (urLedgerZNode , latch ));
163165
164166 LOG .info ("Killing Bookie :" + replicaToKillAddr );
165167 killBookie (replicaToKillAddr );
166168
167169 // waiting to publish urLedger znode by Auditor
168- LOG .info ("===> 2 Watching on urLedgerPath:" + urLedgerZNode
169- + " to know the status of rereplication process" );
170170 latch .await ();
171171 latch = new CountDownLatch (1 );
172- LOG .info ("===> 3 Watching on urLedgerPath:" + urLedgerZNode
172+ LOG .info ("Watching on urLedgerPath:" + urLedgerZNode
173173 + " to know the status of rereplication process" );
174174 assertNotNull ("UrLedger doesn't exists!" ,
175175 watchUrLedgerNode (urLedgerZNode , latch ));
@@ -184,17 +184,14 @@ public void testOpenLedgers() throws Exception {
184184 LOG .debug ("Waiting to finish the replication of failed bookie : "
185185 + replicaToKillAddr );
186186 }
187- LOG .info ("===> 4 Watching on urLedgerPath:" + urLedgerZNode
188- + " to know the status of rereplication process" );
189187 latch .await ();
190- LOG .info ("===> 5 Watching on urLedgerPath:" + urLedgerZNode
191- + " to know the status of rereplication process" );
188+
192189 // grace period to update the urledger metadata in zookeeper
193190 LOG .info ("Waiting to update the urledger metadata in zookeeper" );
194191
195192 verifyLedgerEnsembleMetadataAfterReplication (newBookieServer ,
196193 listOfLedgerHandle .get (0 ), ledgerReplicaIndex );
197- LOG .info ("===> Finished test open ledgers " );
194+ LOG .info ("===> Finished testOpenLedgers " );
198195 }
199196
200197 /**
@@ -203,41 +200,104 @@ public void testOpenLedgers() throws Exception {
203200 */
204201 @ Test
205202 public void testClosedLedgers () throws Exception {
206- LOG .info ("===> Testing close ledgers " );
203+ LOG .info ("===> Start testClosedLedgers " );
207204 List <Integer > listOfReplicaIndex = new ArrayList <Integer >();
208205 List <LedgerHandle > listOfLedgerHandle = createLedgersAndAddEntries (1 , 5 );
209206 closeLedgers (listOfLedgerHandle );
210207 LedgerHandle lhandle = listOfLedgerHandle .get (0 );
211208 int ledgerReplicaIndex = 0 ;
212209 BookieId replicaToKillAddr = lhandle .getLedgerMetadata ().getAllEnsembles ().get (0L ).get (0 );
213210
214- String urLedgerZNode = null ;
215211 CountDownLatch latch = new CountDownLatch (listOfLedgerHandle .size ());
216212 for (LedgerHandle lh : listOfLedgerHandle ) {
217213 ledgerReplicaIndex = getReplicaIndexInLedger (lh , replicaToKillAddr );
218214 listOfReplicaIndex .add (ledgerReplicaIndex );
219- urLedgerZNode = getUrLedgerZNode (lh );
220- LOG .info ("===> Watching on urLedgerPath:" + urLedgerZNode
215+ assertNull ("UrLedger already exists!" ,
216+ watchUrLedgerNode (getUrLedgerZNode (lh ), latch ));
217+ }
218+
219+ LOG .info ("Killing Bookie :" + replicaToKillAddr );
220+ killBookie (replicaToKillAddr );
221+
222+ // waiting to publish urLedger znode by Auditor
223+ latch .await ();
224+
225+ // Again watching the urLedger znode to know the replication status
226+ latch = new CountDownLatch (listOfLedgerHandle .size ());
227+ for (LedgerHandle lh : listOfLedgerHandle ) {
228+ String urLedgerZNode = getUrLedgerZNode (lh );
229+ LOG .info ("Watching on urLedgerPath:" + urLedgerZNode
221230 + " to know the status of rereplication process" );
231+ assertNotNull ("UrLedger doesn't exists!" ,
232+ watchUrLedgerNode (urLedgerZNode , latch ));
233+ }
234+
235+ // starting the replication service, so that he will be able to act as
236+ // target bookie
237+ startNewBookie ();
238+ int newBookieIndex = lastBookieIndex ();
239+ BookieServer newBookieServer = serverByIndex (newBookieIndex );
240+
241+ if (LOG .isDebugEnabled ()) {
242+ LOG .debug ("Waiting to finish the replication of failed bookie : "
243+ + replicaToKillAddr );
244+ }
245+
246+ // waiting to finish replication
247+ latch .await ();
248+
249+ // grace period to update the urledger metadata in zookeeper
250+ LOG .info ("Waiting to update the urledger metadata in zookeeper" );
251+
252+ for (int index = 0 ; index < listOfLedgerHandle .size (); index ++) {
253+ verifyLedgerEnsembleMetadataAfterReplication (newBookieServer ,
254+ listOfLedgerHandle .get (index ),
255+ listOfReplicaIndex .get (index ));
256+ }
257+ LOG .info ("===> Finished testClosedLedgers" );
258+ }
259+
260+ /**
261+ * Test stopping replica service while replication in progress. Considering
262+ * when there is an exception will shutdown Auditor and RW processes. After
263+ * restarting should be able to finish the re-replication activities
264+ */
265+ @ Test
266+ public void testStopWhileReplicationInProgress () throws Exception {
267+ LOG .info ("===> Start testStopWhileReplicationInProgress" );
268+ int numberOfLedgers = 2 ;
269+ List <Integer > listOfReplicaIndex = new ArrayList <Integer >();
270+ List <LedgerHandle > listOfLedgerHandle = createLedgersAndAddEntries (
271+ numberOfLedgers , 5 );
272+ closeLedgers (listOfLedgerHandle );
273+ LedgerHandle handle = listOfLedgerHandle .get (0 );
274+ BookieId replicaToKillAddr = handle .getLedgerMetadata ().getAllEnsembles ().get (0L ).get (0 );
275+ LOG .info ("Killing Bookie:" + replicaToKillAddr );
276+
277+ // Each ledger, there will be two events : create urLedger and after
278+ // rereplication delete urLedger
279+ CountDownLatch latch = new CountDownLatch (listOfLedgerHandle .size ());
280+ for (int i = 0 ; i < listOfLedgerHandle .size (); i ++) {
281+ final String urLedgerZNode = getUrLedgerZNode (listOfLedgerHandle
282+ .get (i ));
222283 assertNull ("UrLedger already exists!" ,
223284 watchUrLedgerNode (urLedgerZNode , latch ));
285+ int replicaIndexInLedger = getReplicaIndexInLedger (
286+ listOfLedgerHandle .get (i ), replicaToKillAddr );
287+ listOfReplicaIndex .add (replicaIndexInLedger );
224288 }
225289
226290 LOG .info ("Killing Bookie :" + replicaToKillAddr );
227- LOG .info ("===> 2 Watching on urLedgerPath:" + urLedgerZNode
228- + " to know the status of rereplication process" );
229291 killBookie (replicaToKillAddr );
230- LOG .info ("===> 3 Watching on urLedgerPath:" + urLedgerZNode
231- + " to know the status of rereplication process" );
232292
233293 // waiting to publish urLedger znode by Auditor
234294 latch .await ();
235295
236296 // Again watching the urLedger znode to know the replication status
237297 latch = new CountDownLatch (listOfLedgerHandle .size ());
238298 for (LedgerHandle lh : listOfLedgerHandle ) {
239- urLedgerZNode = getUrLedgerZNode (lh );
240- LOG .info ("===> 4 Watching on urLedgerPath:" + urLedgerZNode
299+ String urLedgerZNode = getUrLedgerZNode (lh );
300+ LOG .info ("Watching on urLedgerPath:" + urLedgerZNode
241301 + " to know the status of rereplication process" );
242302 assertNotNull ("UrLedger doesn't exists!" ,
243303 watchUrLedgerNode (urLedgerZNode , latch ));
@@ -253,13 +313,21 @@ public void testClosedLedgers() throws Exception {
253313 LOG .debug ("Waiting to finish the replication of failed bookie : "
254314 + replicaToKillAddr );
255315 }
316+ while (true ) {
317+ if (latch .getCount () < numberOfLedgers || latch .getCount () <= 0 ) {
318+ stopReplicationService ();
319+ LOG .info ("Latch Count is:" + latch .getCount ());
320+ break ;
321+ }
322+ // grace period to take breath
323+ Thread .sleep (1000 );
324+ }
256325
257- // waiting to finish replication
258- LOG . info ( "===> 5 Watching on urLedgerPath:" + urLedgerZNode
259- + " to know the status of rereplication process " );
326+ startReplicationService ();
327+
328+ LOG . info ( "Waiting to finish rereplication processes " );
260329 latch .await ();
261- LOG .info ("===> 6 Watching on urLedgerPath:" + urLedgerZNode
262- + " to know the status of rereplication process" );
330+
263331 // grace period to update the urledger metadata in zookeeper
264332 LOG .info ("Waiting to update the urledger metadata in zookeeper" );
265333
@@ -268,7 +336,7 @@ public void testClosedLedgers() throws Exception {
268336 listOfLedgerHandle .get (index ),
269337 listOfReplicaIndex .get (index ));
270338 }
271- LOG .info ("===> Finished test close ledgers " );
339+ LOG .info ("===> Finished testStopWhileReplicationInProgress " );
272340 }
273341
274342 /**
@@ -278,7 +346,7 @@ public void testClosedLedgers() throws Exception {
278346 */
279347 @ Test
280348 public void testNoSuchLedgerExists () throws Exception {
281- LOG .info ("===> Starting testNoSuchLedgerExists" );
349+ LOG .info ("===> Start testNoSuchLedgerExists" );
282350 List <LedgerHandle > listOfLedgerHandle = createLedgersAndAddEntries (2 , 5 );
283351 CountDownLatch latch = new CountDownLatch (listOfLedgerHandle .size ());
284352 for (LedgerHandle lh : listOfLedgerHandle ) {
@@ -324,7 +392,7 @@ public void testNoSuchLedgerExists() throws Exception {
324392 */
325393 @ Test
326394 public void testEmptyLedgerLosesQuorumEventually () throws Exception {
327- LOG .info ("===> Starting testEmptyLedgerLosesQuorumEventually" );
395+ LOG .info ("===> Start testEmptyLedgerLosesQuorumEventually" );
328396 LedgerHandle lh = bkc .createLedger (3 , 2 , 2 , DigestType .CRC32 , PASSWD );
329397 CountDownLatch latch = new CountDownLatch (1 );
330398 String urZNode = getUrLedgerZNode (lh );
@@ -375,7 +443,7 @@ public void testEmptyLedgerLosesQuorumEventually() throws Exception {
375443 @ Test
376444 public void testLedgerMetadataContainsIpAddressAsBookieID ()
377445 throws Exception {
378- LOG .info ("===> Starting testLedgerMetadataContainsIpAddressAsBookieID" );
446+ LOG .info ("===> Start testLedgerMetadataContainsIpAddressAsBookieID" );
379447 stopBKCluster ();
380448 bkc = new BookKeeperTestClient (baseClientConf );
381449 // start bookie with useHostNameAsBookieID=false, as old bookie
@@ -451,7 +519,7 @@ public void testLedgerMetadataContainsIpAddressAsBookieID()
451519 @ Test
452520 public void testLedgerMetadataContainsHostNameAsBookieID ()
453521 throws Exception {
454- LOG .info ("===> Starting testLedgerMetadataContainsHostNameAsBookieID" );
522+ LOG .info ("===> Start testLedgerMetadataContainsHostNameAsBookieID" );
455523 stopBKCluster ();
456524
457525 bkc = new BookKeeperTestClient (baseClientConf );
0 commit comments