Skip to content

Commit 5a385bb

Browse files
committed
SERVER-41278 FSM dbhash background check shouldn't use sessions outside of error retry loop
1 parent b8602c0 commit 5a385bb

File tree

1 file changed

+37
-19
lines changed

1 file changed

+37
-19
lines changed

jstests/hooks/run_check_repl_dbhash_background.js

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ if (typeof db === 'undefined') {
3131
TestData = TestData || {};
3232
TestData.traceExceptions = false;
3333

34+
// Disable implicit sessions so FSM workloads that kill random sessions won't interrupt the
35+
// operations in this test that aren't resilient to interruptions.
36+
TestData.disableImplicitSessions = true;
37+
3438
const conn = db.getMongo();
3539
const topology = DiscoverTopology.findConnectedNodes(conn);
3640

@@ -78,35 +82,40 @@ function checkReplDbhashBackgroundThread(hosts) {
7882
// We enable the "WTPreserveSnapshotHistoryIndefinitely" failpoint to ensure that the same
7983
// snapshot will be available to read at on the primary and secondaries.
8084
for (let session of sessions) {
81-
const db = session.getDatabase('admin');
85+
// Use the session's client directly so FSM workloads that kill random sessions won't
86+
// interrupt these operations.
87+
const dbNoSession = session.getClient().getDB('admin');
8288

83-
let preserveRes = assert.commandWorked(db.runCommand({
89+
let preserveRes = assert.commandWorked(dbNoSession.runCommand({
8490
configureFailPoint: 'WTPreserveSnapshotHistoryIndefinitely',
8591
mode: 'alwaysOn',
8692
}),
8793
debugInfo);
8894
debugInfo.push({
89-
"node": db.getMongo(),
95+
"node": dbNoSession.getMongo(),
9096
"session": session,
9197
"preserveFailPointOpTime": preserveRes['operationTime']
9298
});
9399

94100
resetFns.push(() => {
95-
assert.commandWorked(db.runCommand({
101+
assert.commandWorked(dbNoSession.runCommand({
96102
configureFailPoint: 'WTPreserveSnapshotHistoryIndefinitely',
97103
mode: 'off',
98104
}));
99105
});
100106
}
101107

102108
for (let session of sessions) {
103-
const db = session.getDatabase('admin');
104-
const res = assert.commandWorked(db.runCommand({listDatabases: 1, nameOnly: true}));
109+
// Use the session's client directly so FSM workloads that kill random sessions won't
110+
// interrupt these operations.
111+
const dbNoSession = session.getClient().getDB('admin');
112+
const res =
113+
assert.commandWorked(dbNoSession.runCommand({listDatabases: 1, nameOnly: true}));
105114
for (let dbInfo of res.databases) {
106115
dbNames.add(dbInfo.name);
107116
}
108117
debugInfo.push({
109-
"node": db.getMongo(),
118+
"node": dbNoSession.getMongo(),
110119
"session": session,
111120
"listDatabaseOpTime": res['operationTime']
112121
});
@@ -252,6 +261,13 @@ function checkReplDbhashBackgroundThread(hosts) {
252261
return result;
253262
};
254263

264+
// Outside of checkCollectionHashesForDB(), operations in this function are not resilient to
265+
// their session being killed by a concurrent FSM workload, so the driver sessions started above
266+
// have not been used and will have contain null logical time values. The process for selecting
267+
// a read timestamp below assumes each session has valid logical times, so run a dummy command
268+
// through each session to populate its logical times.
269+
sessions.forEach(session => session.getDatabase('admin').runCommand({ping: 1}));
270+
255271
for (let dbName of dbNames) {
256272
let result;
257273
let clusterTime;
@@ -310,32 +326,34 @@ function checkReplDbhashBackgroundThread(hosts) {
310326
signedClusterTime = sess.getClusterTime();
311327
}
312328
}
313-
waitForSecondaries(clusterTime, signedClusterTime);
314-
315-
for (let session of sessions) {
316-
debugInfo.push({
317-
"node": session.getClient(),
318-
"session": session,
319-
"readAtClusterTime": clusterTime
320-
});
321-
}
322329

323330
hasTransientError = false;
324331
performNoopWrite = false;
325332

326333
try {
334+
waitForSecondaries(clusterTime, signedClusterTime);
335+
336+
for (let session of sessions) {
337+
debugInfo.push({
338+
"node": session.getClient(),
339+
"session": session,
340+
"readAtClusterTime": clusterTime
341+
});
342+
}
343+
327344
result = checkCollectionHashesForDB(dbName, clusterTime);
328345
} catch (e) {
329346
if (isTransientError(e)) {
330347
if (performNoopWrite) {
331-
const primarySession = sessions[0];
348+
// Use the session's client directly so FSM workloads that kill random
349+
// sessions won't interrupt appendOplogNote.
350+
const primaryConn = sessions[0].getClient();
332351

333352
// If the no-op write fails due to the global lock not being able to be
334353
// acquired within 1 millisecond, retry the operation again at a later
335354
// time.
336355
assert.commandWorkedOrFailedWithCode(
337-
primarySession.getDatabase(dbName).adminCommand(
338-
{appendOplogNote: 1, data: {}}),
356+
primaryConn.adminCommand({appendOplogNote: 1, data: {}}),
339357
ErrorCodes.LockFailed);
340358
}
341359

0 commit comments

Comments
 (0)