Skip to content

Commit d9cc291

Browse files
ruchitharajaghattaMongoDB Bot
authored andcommitted
SERVER-97775: Write rolled back oplog entry to rollback file for debugging (#31736)
GitOrigin-RevId: 132bad3
1 parent 7379d61 commit d9cc291

File tree

8 files changed

+186
-79
lines changed

8 files changed

+186
-79
lines changed
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/**
2+
* Tests that a rollback directory is created for oplog entries during replica set rollback, and
3+
* verifies that the rolled back oplogs are written to that directory.
4+
*
5+
* @tags: [requires_persistence, requires_fcv_81]
6+
*/
7+
8+
import {RollbackTest} from "jstests/replsets/libs/rollback_test.js";
9+
10+
function runRollbackOplogsTest(shouldCreateRollbackFiles) {
11+
jsTestLog("Testing createRollbackDataFiles = " + shouldCreateRollbackFiles);
12+
const rollbackTest = new RollbackTest(jsTestName());
13+
const rollbackNode = rollbackTest.getPrimary();
14+
const secondTermPrimary = rollbackTest.getSecondary();
15+
assert.commandWorked(rollbackNode.getDB("admin").adminCommand(
16+
{setParameter: 1, createRollbackDataFiles: shouldCreateRollbackFiles}));
17+
assert.commandWorked(secondTermPrimary.getDB("admin").adminCommand(
18+
{setParameter: 1, createRollbackDataFiles: shouldCreateRollbackFiles}));
19+
20+
const dbName = "test";
21+
const collName = "rollbackColl";
22+
23+
// Isolate the rollbackNode (current primary node) and insert documents (which will be rolled
24+
// back).
25+
rollbackTest.transitionToRollbackOperations();
26+
assert.commandWorked(rollbackNode.getDB(dbName)[collName].insert({"a": 1}));
27+
assert.commandWorked(rollbackNode.getDB(dbName)[collName].insert({"a": 2}));
28+
const rst = rollbackTest.getTestFixture();
29+
const oplogsToRollback = rst.findOplog(rollbackNode, {}).toArray();
30+
31+
// Elect the previous secondary as the new primary.
32+
rollbackTest.transitionToSyncSourceOperationsBeforeRollback();
33+
assert.commandWorked(secondTermPrimary.getDB(dbName)[collName].insert({"b": 1}));
34+
35+
// Reconnect the isolated node and rollback should start.
36+
rollbackTest.transitionToSyncSourceOperationsDuringRollback();
37+
// Transition back to steady state.
38+
rollbackTest.transitionToSteadyStateOperations();
39+
40+
// Check for rollback files.
41+
const rollbackNodePath = rst.getDbPath(rollbackNode);
42+
const oplogRollbackDir = rollbackNodePath + "/rollback/local.oplog.rs";
43+
assert.eq(pathExists(oplogRollbackDir), shouldCreateRollbackFiles, oplogRollbackDir);
44+
if (shouldCreateRollbackFiles) {
45+
const listRollbackFiles = listFiles(oplogRollbackDir);
46+
let oplogsRolledBack = [];
47+
for (let i = 0; i < listRollbackFiles.length; i++) {
48+
oplogsRolledBack = oplogsRolledBack.concat(_readDumpFile(listRollbackFiles[i].name));
49+
}
50+
assert.contains(oplogsToRollback[0], oplogsRolledBack);
51+
assert.contains(oplogsToRollback[1], oplogsRolledBack);
52+
}
53+
rst.stopSet();
54+
}
55+
runRollbackOplogsTest(true);
56+
runRollbackOplogsTest(false);

src/mongo/db/repl/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,7 @@ mongo_cc_library(
780780
":oplog_entry",
781781
":optime", # TODO(SERVER-93876): Remove.
782782
"//src/mongo:base",
783+
"//src/mongo/db/storage:remove_saver",
783784
"//src/mongo/util:fail_point", # TODO(SERVER-93876): Remove.
784785
],
785786
)

src/mongo/db/repl/roll_back_local_operations.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "mongo/bson/timestamp.h"
3939
#include "mongo/db/repl/oplog_entry.h"
4040
#include "mongo/db/repl/roll_back_local_operations.h"
41+
#include "mongo/db/storage/remove_saver.h"
4142
#include "mongo/logv2/log.h"
4243
#include "mongo/logv2/log_attr.h"
4344
#include "mongo/logv2/log_component.h"
@@ -76,6 +77,9 @@ long long getTerm(const OplogInterface::Iterator::Value& oplogValue) {
7677
}
7778
} // namespace
7879

80+
static constexpr auto kRollbackRemoveSaverType = "rollback";
81+
static constexpr auto kRollbackRemoveSaverWhy = "removed";
82+
7983
RollBackLocalOperations::RollBackLocalOperations(const OplogInterface& localOplog,
8084
const RollbackOperationFn& rollbackOperation)
8185

@@ -99,7 +103,7 @@ RollBackLocalOperations::RollbackCommonPoint::RollbackCommonPoint(BSONObj oplogB
99103
}
100104

101105
StatusWith<RollBackLocalOperations::RollbackCommonPoint> RollBackLocalOperations::onRemoteOperation(
102-
const BSONObj& operation) {
106+
const BSONObj& operation, RemoveSaver& removeSaver, bool shouldCreateDataFiles) {
103107
if (_scanned == 0) {
104108
auto result = _localOplogIterator->next();
105109
if (!result.isOK()) {
@@ -118,6 +122,9 @@ StatusWith<RollBackLocalOperations::RollbackCommonPoint> RollBackLocalOperations
118122
2,
119123
"Local oplog entry to roll back",
120124
"oplogEntry"_attr = redact(_localOplogValue.first));
125+
if (shouldCreateDataFiles) {
126+
fassert(9777500, removeSaver.goingToDelete(_localOplogValue.first));
127+
}
121128
auto status = _rollbackOperation(_localOplogValue.first);
122129
if (!status.isOK()) {
123130
invariant(ErrorCodes::NoSuchKey != status.code());
@@ -163,7 +170,8 @@ StatusWith<RollBackLocalOperations::RollbackCommonPoint> RollBackLocalOperations
163170
StatusWith<RollBackLocalOperations::RollbackCommonPoint> syncRollBackLocalOperations(
164171
const OplogInterface& localOplog,
165172
const OplogInterface& remoteOplog,
166-
const RollBackLocalOperations::RollbackOperationFn& rollbackOperation) {
173+
const RollBackLocalOperations::RollbackOperationFn& rollbackOperation,
174+
bool shouldCreateDataFiles) {
167175

168176
std::unique_ptr<OplogInterface::Iterator> remoteIterator;
169177

@@ -187,10 +195,12 @@ StatusWith<RollBackLocalOperations::RollbackCommonPoint> syncRollBackLocalOperat
187195

188196
RollBackLocalOperations finder(localOplog, rollbackOperation);
189197
Timestamp theirTime;
198+
RemoveSaver removeSaver(kRollbackRemoveSaverType, "local.oplog.rs", kRollbackRemoveSaverWhy);
190199
while (remoteResult.isOK()) {
191200
BSONObj theirObj = remoteResult.getValue().first;
192201
theirTime = theirObj["ts"].timestamp();
193-
auto result = finder.onRemoteOperation(theirObj);
202+
203+
auto result = finder.onRemoteOperation(theirObj, removeSaver, shouldCreateDataFiles);
194204
if (result.isOK()) {
195205
return result.getValue();
196206
} else if (result.getStatus().code() != ErrorCodes::NoSuchKey) {

src/mongo/db/repl/roll_back_local_operations.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "mongo/db/record_id.h"
3838
#include "mongo/db/repl/oplog_interface.h"
3939
#include "mongo/db/repl/optime.h"
40+
#include "mongo/db/storage/remove_saver.h"
4041
#include "mongo/util/fail_point.h"
4142
#include "mongo/util/time_support.h"
4243

@@ -101,7 +102,9 @@ class RollBackLocalOperations {
101102
* Returns ErrorCodes::NoSuchKey if common point has not been found and
102103
* additional operations have to be read from the remote oplog.
103104
*/
104-
StatusWith<RollbackCommonPoint> onRemoteOperation(const BSONObj& operation);
105+
StatusWith<RollbackCommonPoint> onRemoteOperation(const BSONObj& operation,
106+
RemoveSaver& removeSaver,
107+
bool shouldCreateDataFiles);
105108

106109
private:
107110
std::unique_ptr<OplogInterface::Iterator> _localOplogIterator;
@@ -122,7 +125,8 @@ class RollBackLocalOperations {
122125
StatusWith<RollBackLocalOperations::RollbackCommonPoint> syncRollBackLocalOperations(
123126
const OplogInterface& localOplog,
124127
const OplogInterface& remoteOplog,
125-
const RollBackLocalOperations::RollbackOperationFn& rollbackOperation);
128+
const RollBackLocalOperations::RollbackOperationFn& rollbackOperation,
129+
bool shouldCreateDataFiles);
126130

127131
} // namespace repl
128132
} // namespace mongo

0 commit comments

Comments
 (0)