@@ -61,6 +61,7 @@ function RollbackTest(name = "RollbackTest", replSet) {
61
61
const SIGTERM = 15 ;
62
62
const kNumDataBearingNodes = 3 ;
63
63
const kElectableNodes = 2 ;
64
+ const kForeverSecs = 24 * 60 * 60 ;
64
65
65
66
let rst ;
66
67
let curPrimary ;
@@ -236,6 +237,9 @@ function RollbackTest(name = "RollbackTest", replSet) {
236
237
`may prevent a rollback here.` ) ;
237
238
}
238
239
240
+ // Unfreeze the node if it was previously frozen, so that it can run for the election.
241
+ assert . commandWorked ( curSecondary . adminCommand ( { replSetFreeze : 0 } ) ) ;
242
+
239
243
// Ensure that the tiebreaker node is connected to the other nodes. We must do this after
240
244
// we are sure that rollback has completed on the rollback node.
241
245
tiebreakerNode . reconnect ( [ curPrimary , curSecondary ] ) ;
@@ -369,6 +373,12 @@ function RollbackTest(name = "RollbackTest", replSet) {
369
373
this . transitionToSyncSourceOperationsDuringRollback = function ( ) {
370
374
transitionIfAllowed ( State . kSyncSourceOpsDuringRollback ) ;
371
375
376
+ // If the nodes are restarted after the rollback node is able to rollback successfully and
377
+ // catch up to curPrimary's oplog, then the rollback node can become the new primary.
378
+ // If so, it can lead to unplanned state transitions, like unconditional step down, during
379
+ // the test. To avoid those problems, prevent rollback node from starting an election.
380
+ assert . commandWorked ( curSecondary . adminCommand ( { replSetFreeze : kForeverSecs } ) ) ;
381
+
372
382
log ( `Reconnecting the secondary ${ curSecondary . host } so it'll go into rollback` ) ;
373
383
// Reconnect the rollback node to the current primary, which is the node we want to sync
374
384
// from. If we reconnect to both the current primary and the tiebreaker node, the rollback
@@ -431,9 +441,39 @@ function RollbackTest(name = "RollbackTest", replSet) {
431
441
log ( `Restarting node ${ hostName } ` ) ;
432
442
rst . start ( nodeId , startOptions , true /* restart */ ) ;
433
443
434
- // Ensure that the primary is ready to take operations before continuing. If both nodes are
435
- // connected to the tiebreaker node, the primary may switch.
444
+ // Freeze the node if the restarted node is the rollback node.
445
+ if ( curState === State . kSyncSourceOpsDuringRollback &&
446
+ rst . getNodeId ( curSecondary ) === nodeId ) {
447
+ assert . soon ( ( ) => {
448
+ // Try stepping down the rollback node if it became the primary after its
449
+ // restart, as it might have caught up with the original primary.
450
+ curSecondary . adminCommand ( { "replSetStepDown" : kForeverSecs , "force" : true } ) ;
451
+ try {
452
+ // Prevent rollback node from running election. There is a chance that this
453
+ // node might have started running election or became primary after
454
+ // 'replSetStepDown' cmd, so 'replSetFreeze' cmd can fail.
455
+ assert . commandWorked (
456
+ curSecondary . adminCommand ( { "replSetFreeze" : kForeverSecs } ) ) ;
457
+ return true ;
458
+ } catch ( e ) {
459
+ if ( e . code === ErrorCodes . NotSecondary ) {
460
+ return false ;
461
+ }
462
+ throw e ;
463
+ }
464
+ } , `Failed to run replSetFreeze cmd on ${ curSecondary . host } ` ) ;
465
+ }
466
+
467
+ const oldPrimary = curPrimary ;
468
+ // Wait for the new primary to be elected and ready to take operations before continuing.
436
469
curPrimary = rst . getPrimary ( ) ;
470
+
471
+ // The primary can change after node restarts only if all the 3 nodes are connected to each
472
+ // other.
473
+ if ( curState !== State . kSteadyStateOps ) {
474
+ assert . eq ( curPrimary , oldPrimary ) ;
475
+ }
476
+
437
477
curSecondary = rst . getSecondary ( ) ;
438
478
assert . neq ( curPrimary , curSecondary ) ;
439
479
} ;
0 commit comments