|
| 1 | +/* |
| 2 | + * Test that the read operations are not killed and their connections are also not |
| 3 | + * closed during step up. |
| 4 | + */ |
| 5 | +load("jstests/libs/check_log.js"); |
| 6 | +load('jstests/libs/parallelTester.js'); |
| 7 | +load("jstests/libs/curop_helpers.js"); // for waitForCurOpByFailPoint(). |
| 8 | +load("jstests/replsets/rslib.js"); |
| 9 | + |
| 10 | +(function() { |
| 11 | + |
| 12 | +"use strict"; |
| 13 | + |
| 14 | +const testName = jsTestName(); |
| 15 | +const dbName = "test"; |
| 16 | +const collName = "coll"; |
| 17 | + |
| 18 | +const rst = new ReplSetTest({name: testName, nodes: 2}); |
| 19 | +rst.startSet(); |
| 20 | +rst.initiateWithHighElectionTimeout(); |
| 21 | + |
| 22 | +const primary = rst.getPrimary(); |
| 23 | +const primaryDB = primary.getDB(dbName); |
| 24 | +const primaryColl = primaryDB[collName]; |
| 25 | + |
| 26 | +const secondary = rst.getSecondary(); |
| 27 | +const secondaryDB = secondary.getDB(dbName); |
| 28 | +const secondaryAdmin = secondary.getDB("admin"); |
| 29 | +const secondaryColl = secondaryDB[collName]; |
| 30 | +const secondaryCollNss = secondaryColl.getFullName(); |
| 31 | + |
| 32 | +TestData.dbName = dbName; |
| 33 | +TestData.collName = collName; |
| 34 | + |
| 35 | +jsTestLog("1. Do a document write"); |
| 36 | +assert.commandWorked( |
| 37 | + primaryColl.insert({_id: 0}, {"writeConcern": {"w": "majority"}})); |
| 38 | +rst.awaitReplication(); |
| 39 | + |
| 40 | +// Open a cursor on secondary. |
| 41 | +const cursorIdToBeReadAfterStepUp = |
| 42 | + assert.commandWorked(secondaryDB.runCommand({"find": collName, batchSize: 0})).cursor.id; |
| 43 | + |
| 44 | +jsTestLog("2. Start blocking getMore cmd before step up"); |
| 45 | +const joinGetMoreThread = startParallelShell(() => { |
| 46 | + // Open another cursor on secondary before step up. |
| 47 | + secondaryDB = db.getSiblingDB(TestData.dbName); |
| 48 | + secondaryDB.getMongo().setSlaveOk(true); |
| 49 | + |
| 50 | + const cursorIdToBeReadDuringStepUp = |
| 51 | + assert.commandWorked(secondaryDB.runCommand({"find": TestData.collName, batchSize: 0})) |
| 52 | + .cursor.id; |
| 53 | + |
| 54 | + // Enable the fail point for get more cmd. |
| 55 | + assert.commandWorked(db.adminCommand( |
| 56 | + {configureFailPoint: "waitAfterPinningCursorBeforeGetMoreBatch", mode: "alwaysOn"})); |
| 57 | + |
| 58 | + getMoreRes = assert.commandWorked(secondaryDB.runCommand( |
| 59 | + {"getMore": cursorIdToBeReadDuringStepUp, collection: TestData.collName})); |
| 60 | + assert.docEq([{_id: 0}], getMoreRes.cursor.nextBatch); |
| 61 | +}, secondary.port); |
| 62 | + |
| 63 | +// Wait for getmore cmd to reach the fail point. |
| 64 | +waitForCurOpByFailPoint( |
| 65 | + secondaryAdmin, secondaryCollNss, "waitAfterPinningCursorBeforeGetMoreBatch"); |
| 66 | + |
| 67 | +jsTestLog("2. Start blocking find cmd before step up"); |
| 68 | +const joinFindThread = startParallelShell(() => { |
| 69 | + secondaryDB = db.getSiblingDB(TestData.dbName); |
| 70 | + secondaryDB.getMongo().setSlaveOk(true); |
| 71 | + |
| 72 | + // Enable the fail point for find cmd. |
| 73 | + assert.commandWorked( |
| 74 | + db.adminCommand({configureFailPoint: "waitInFindBeforeMakingBatch", mode: "alwaysOn"})); |
| 75 | + |
| 76 | + const findRes = assert.commandWorked(secondaryDB.runCommand({"find": TestData.collName})); |
| 77 | + assert.docEq([{_id: 0}], findRes.cursor.firstBatch); |
| 78 | +}, secondary.port); |
| 79 | + |
| 80 | +// Wait for find cmd to reach the fail point. |
| 81 | +waitForCurOpByFailPoint(secondaryAdmin, secondaryCollNss, "waitInFindBeforeMakingBatch"); |
| 82 | + |
| 83 | +jsTestLog("3. Make primary step up"); |
| 84 | +const joinStepUpThread = startParallelShell(() => { |
| 85 | + assert.commandWorked(db.adminCommand({"replSetStepUp": 100, "force": true})); |
| 86 | +}, secondary.port); |
| 87 | + |
| 88 | +// Wait until the step up has started to kill user operations. |
| 89 | +checkLog.contains(secondary, "Starting to kill user operations"); |
| 90 | + |
| 91 | +// Enable "waitAfterCommandFinishesExecution" fail point to make sure the find and get more |
| 92 | +// commands on database 'test' does not complete before step up. |
| 93 | +setFailPoint(secondaryAdmin, |
| 94 | + "waitAfterCommandFinishesExecution", |
| 95 | + {ns: secondaryCollNss, commands: ["find", "getMore"]}); |
| 96 | + |
| 97 | +jsTestLog("4. Disable fail points"); |
| 98 | +clearFailPoint(secondaryAdmin, "waitInFindBeforeMakingBatch"); |
| 99 | +clearFailPoint(secondaryAdmin, "waitAfterPinningCursorBeforeGetMoreBatch"); |
| 100 | + |
| 101 | +// Wait until the secondary transitioned to PRIMARY state. |
| 102 | +joinStepUpThread(); |
| 103 | +rst.waitForState(secondary, ReplSetTest.State.PRIMARY); |
| 104 | + |
| 105 | +// We don't want to check if we have reached "waitAfterCommandFinishesExecution" fail point |
| 106 | +// because we already know that the secondary has stepped up successfully. This implies that |
| 107 | +// the find and get more commands are still running even after the node stepped up. |
| 108 | +clearFailPoint(secondaryAdmin, "waitAfterCommandFinishesExecution"); |
| 109 | + |
| 110 | +// Wait for find & getmore thread to join. |
| 111 | +joinGetMoreThread(); |
| 112 | +joinFindThread(); |
| 113 | + |
| 114 | +jsTestLog("5. Start get more cmd after step up"); |
| 115 | +const getMoreRes = assert.commandWorked( |
| 116 | + secondaryDB.runCommand({"getMore": cursorIdToBeReadAfterStepUp, collection: collName})); |
| 117 | +assert.docEq([{_id: 0}], getMoreRes.cursor.nextBatch); |
| 118 | + |
| 119 | +// Validate that no operations got killed on step up and no network disconnection happened due |
| 120 | +// to failed unacknowledged operations. |
| 121 | +const replMetrics = |
| 122 | + assert.commandWorked(secondaryAdmin.adminCommand({serverStatus: 1})).metrics.repl; |
| 123 | +assert.eq(replMetrics.stateTransition.lastStateTransition, "stepUp"); |
| 124 | +assert.eq(replMetrics.stateTransition.userOperationsKilled, 0); |
| 125 | +// Should account for find and getmore commands issued before step up. |
| 126 | +assert.gte(replMetrics.stateTransition.userOperationsRunning, 2); |
| 127 | +assert.eq(replMetrics.network.notMasterUnacknowledgedWrites, 0); |
| 128 | + |
| 129 | +rst.stopSet(); |
| 130 | +})(); |
0 commit comments