Skip to content

Commit d771340

Browse files
committed
fix(mongodb-log-writer): issues with parallel mongosh executions MONGOSH-1914
1 parent 95d3673 commit d771340

File tree

2 files changed

+71
-35
lines changed

2 files changed

+71
-35
lines changed

packages/mongodb-log-writer/src/mongo-log-manager.spec.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ describe('MongoLogManager', function () {
2525
);
2626
await fs.mkdir(directory, { recursive: true });
2727
});
28+
2829
afterEach(async function () {
30+
sinon.restore();
2931
await fs.rmdir(directory, { recursive: true });
3032
});
3133

@@ -213,4 +215,27 @@ describe('MongoLogManager', function () {
213215
writer.end();
214216
await once(writer, 'finish');
215217
});
218+
219+
it('retries cleaning up old log files', async function () {
220+
const fakeDirHandle = {
221+
[Symbol.asyncIterator]: () => {
222+
throw Object.assign(new Error('File not found'), { code: 'ENOENT' });
223+
},
224+
close: sinon.stub().resolves(),
225+
};
226+
227+
const opendirStub = sinon.stub(fs, 'opendir').resolves(fakeDirHandle as any);
228+
sinon.replace(fs, 'opendir', opendirStub);
229+
230+
retentionDays = 0.000001; // 86.4 ms
231+
const manager = new MongoLogManager({
232+
directory,
233+
retentionDays,
234+
onwarn,
235+
onerror,
236+
});
237+
238+
await manager.cleanupOldLogFiles();
239+
expect(opendirStub).to.have.been.calledTwice;
240+
});
216241
});

packages/mongodb-log-writer/src/mongo-log-manager.ts

Lines changed: 46 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ export class MongoLogManager {
3636
}
3737

3838
/** Clean up log files older than `retentionDays`. */
39-
async cleanupOldLogFiles(maxDurationMs = 5_000): Promise<void> {
39+
async cleanupOldLogFiles(maxDurationMs = 5_000, remainingRetries = 1): Promise<void> {
4040
const dir = this._options.directory;
4141
let dirHandle;
4242
try {
@@ -56,43 +56,54 @@ export class MongoLogManager {
5656
fullPath: string;
5757
}>((a, b) => a.fileTimestamp - b.fileTimestamp);
5858

59-
for await (const dirent of dirHandle) {
60-
// Cap the overall time spent inside this function. Consider situations like
61-
// a large number of machines using a shared network-mounted $HOME directory
62-
// where lots and lots of log files end up and filesystem operations happen
63-
// with network latency.
64-
if (Date.now() - deletionStartTimestamp > maxDurationMs) break;
65-
66-
if (!dirent.isFile()) continue;
67-
const { id } =
68-
/^(?<id>[a-f0-9]{24})_log(\.gz)?$/i.exec(dirent.name)?.groups ?? {};
69-
if (!id) continue;
70-
const fileTimestamp = +new ObjectId(id).getTimestamp();
71-
const fullPath = path.join(dir, dirent.name);
72-
let toDelete: string | undefined;
73-
74-
// If the file is older than expected, delete it. If the file is recent,
75-
// add it to the list of seen files, and if that list is too large, remove
76-
// the least recent file we've seen so far.
77-
if (fileTimestamp < deletionCutoffTimestamp) {
78-
toDelete = fullPath;
79-
} else if (this._options.maxLogFileCount) {
80-
leastRecentFileHeap.push({ fullPath, fileTimestamp });
81-
if (leastRecentFileHeap.size() > this._options.maxLogFileCount) {
82-
toDelete = leastRecentFileHeap.pop()?.fullPath;
59+
try {
60+
for await (const dirent of dirHandle) {
61+
// Cap the overall time spent inside this function. Consider situations like
62+
// a large number of machines using a shared network-mounted $HOME directory
63+
// where lots and lots of log files end up and filesystem operations happen
64+
// with network latency.
65+
if (Date.now() - deletionStartTimestamp > maxDurationMs) break;
66+
67+
if (!dirent.isFile()) continue;
68+
const { id } =
69+
/^(?<id>[a-f0-9]{24})_log(\.gz)?$/i.exec(dirent.name)?.groups ?? {};
70+
if (!id) continue;
71+
const fileTimestamp = +new ObjectId(id).getTimestamp();
72+
const fullPath = path.join(dir, dirent.name);
73+
let toDelete: string | undefined;
74+
75+
// If the file is older than expected, delete it. If the file is recent,
76+
// add it to the list of seen files, and if that list is too large, remove
77+
// the least recent file we've seen so far.
78+
if (fileTimestamp < deletionCutoffTimestamp) {
79+
toDelete = fullPath;
80+
} else if (this._options.maxLogFileCount) {
81+
leastRecentFileHeap.push({ fullPath, fileTimestamp });
82+
if (leastRecentFileHeap.size() > this._options.maxLogFileCount) {
83+
toDelete = leastRecentFileHeap.pop()?.fullPath;
84+
}
8385
}
84-
}
85-
86-
if (!toDelete) continue;
87-
try {
88-
await fs.unlink(toDelete);
89-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
90-
} catch (err: any) {
91-
if (err?.code !== 'ENOENT') {
92-
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
93-
this._options.onerror(err, fullPath);
86+
87+
if (!toDelete) continue;
88+
try {
89+
await fs.unlink(toDelete);
90+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
91+
} catch (err: any) {
92+
if (err.code !== 'ENOENT') {
93+
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
94+
this._options.onerror(err, fullPath);
95+
}
9496
}
9597
}
98+
} catch (statErr: any) {
99+
// Multiple processes may attempt to clean up log files in parallel.
100+
// A situation can arise where one process tries to read a file
101+
// that another process has already unlinked (see MONGOSH-1914).
102+
// To handle such scenarios, we will catch lstat errors and retry cleaning up
103+
// to let different processes reach out to different log files.
104+
if (statErr.code === 'ENOENT' && remainingRetries > 0) {
105+
await this.cleanupOldLogFiles(maxDurationMs, remainingRetries - 1);
106+
}
96107
}
97108
}
98109

0 commit comments

Comments
 (0)