Skip to content

Commit 99afade

Browse files
committed
refactor: use a heap-based second run approach
Includes test for random OS-based file order as well as mixing different settings together.
1 parent b008854 commit 99afade

File tree

2 files changed

+235
-65
lines changed

2 files changed

+235
-65
lines changed

packages/mongodb-log-writer/src/mongo-log-manager.spec.ts

Lines changed: 168 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { MongoLogManager, mongoLogId } from '.';
22
import { ObjectId } from 'bson';
33
import { once } from 'events';
4-
import type { Stats } from 'fs';
4+
import type { Stats, Dir } from 'fs';
55
import { promises as fs } from 'fs';
66
import path from 'path';
77
import os from 'os';
@@ -178,7 +178,7 @@ describe('MongoLogManager', function () {
178178
expect(leftoverFiles).deep.equals([faultyFile, ...validFiles.slice(3)]);
179179
});
180180

181-
it('cleans up least recent log files when requested with a storage limit', async function () {
181+
it('cleans up least recent log files when over a storage limit', async function () {
182182
const manager = new MongoLogManager({
183183
directory,
184184
retentionDays,
@@ -207,6 +207,172 @@ describe('MongoLogManager', function () {
207207
expect(await getFilesState(paths)).to.equal('0000111111');
208208
});
209209

210+
describe('with a random file order', function () {
211+
let paths: string[] = [];
212+
const times = [92, 90, 1, 2, 3, 91];
213+
214+
beforeEach(async function () {
215+
const fileNames: string[] = [];
216+
paths = [];
217+
const offset = Math.floor(Date.now() / 1000);
218+
219+
for (const time of times) {
220+
const fileName =
221+
ObjectId.createFromTime(offset - time).toHexString() + '_log';
222+
const fullPath = path.join(directory, fileName);
223+
await fs.writeFile(fullPath, '0'.repeat(1024));
224+
fileNames.push(fileName);
225+
paths.push(fullPath);
226+
}
227+
228+
sinon.replace(fs, 'opendir', async () =>
229+
Promise.resolve({
230+
[Symbol.asyncIterator]: function* () {
231+
for (const fileName of fileNames) {
232+
yield {
233+
name: fileName,
234+
isFile: () => true,
235+
};
236+
}
237+
},
238+
} as unknown as Dir)
239+
);
240+
});
241+
242+
it('cleans up in the expected order with maxLogFileCount', async function () {
243+
const manager = new MongoLogManager({
244+
directory,
245+
retentionDays,
246+
maxLogFileCount: 3,
247+
onwarn,
248+
onerror,
249+
});
250+
251+
expect(await getFilesState(paths)).to.equal('111111');
252+
253+
await manager.cleanupOldLogFiles();
254+
255+
expect(await getFilesState(paths)).to.equal('001110');
256+
});
257+
258+
it('cleans up in the expected order with retentionGB', async function () {
259+
const manager = new MongoLogManager({
260+
directory,
261+
retentionDays,
262+
retentionGB: 3 / 1024 / 1024,
263+
onwarn,
264+
onerror,
265+
});
266+
267+
expect(await getFilesState(paths)).to.equal('111111');
268+
269+
await manager.cleanupOldLogFiles();
270+
271+
expect(await getFilesState(paths)).to.equal('001110');
272+
});
273+
});
274+
275+
describe('with multiple log retention settings', function () {
276+
it('with retention days, file count, and max size maintains all conditions', async function () {
277+
const manager = new MongoLogManager({
278+
directory,
279+
retentionDays: 1,
280+
maxLogFileCount: 3,
281+
retentionGB: 2 / 1024 / 1024,
282+
onwarn,
283+
onerror,
284+
});
285+
286+
const paths: string[] = [];
287+
288+
// Create 4 files which are all older than 1 day and 4 which are from today.
289+
for (let i = 0; i < 4; i++) {
290+
const today = Math.floor(Date.now() / 1000);
291+
const yesterday = today - 25 * 60 * 60;
292+
const todayFile = path.join(
293+
directory,
294+
ObjectId.createFromTime(today - i).toHexString() + '_log'
295+
);
296+
await fs.writeFile(todayFile, '0'.repeat(1024));
297+
298+
const yesterdayFile = path.join(
299+
directory,
300+
ObjectId.createFromTime(yesterday - i).toHexString() + '_log'
301+
);
302+
await fs.writeFile(yesterdayFile, '0'.repeat(1024));
303+
304+
paths.unshift(todayFile);
305+
paths.unshift(yesterdayFile);
306+
}
307+
308+
expect(await getFilesState(paths)).to.equal('11111111');
309+
310+
await manager.cleanupOldLogFiles();
311+
312+
// All yesterdays files, 2 of today's files should be deleted.
313+
// (because of file count and file size)
314+
expect(await getFilesState(paths)).to.equal('00000101');
315+
});
316+
317+
it('with low GB but high file count maintains both conditions', async function () {
318+
const manager = new MongoLogManager({
319+
directory,
320+
retentionDays,
321+
maxLogFileCount: 3,
322+
// 2 KB, so 2 files
323+
retentionGB: 2 / 1024 / 1024,
324+
onwarn,
325+
onerror,
326+
});
327+
328+
const paths: string[] = [];
329+
const offset = Math.floor(Date.now() / 1000);
330+
331+
// Create 10 files of 1 KB each.
332+
for (let i = 0; i < 10; i++) {
333+
const filename = path.join(
334+
directory,
335+
ObjectId.createFromTime(offset - i).toHexString() + '_log'
336+
);
337+
await fs.writeFile(filename, '0'.repeat(1024));
338+
paths.unshift(filename);
339+
}
340+
341+
expect(await getFilesState(paths)).to.equal('1111111111');
342+
await manager.cleanupOldLogFiles();
343+
expect(await getFilesState(paths)).to.equal('0000000011');
344+
});
345+
346+
it('with high GB but low file count maintains both conditions', async function () {
347+
const manager = new MongoLogManager({
348+
directory,
349+
retentionDays,
350+
maxLogFileCount: 2,
351+
// 3 KB, so 3 files
352+
retentionGB: 3 / 1024 / 1024,
353+
onwarn,
354+
onerror,
355+
});
356+
357+
const paths: string[] = [];
358+
const offset = Math.floor(Date.now() / 1000);
359+
360+
// Create 10 files of 1 KB each.
361+
for (let i = 0; i < 10; i++) {
362+
const filename = path.join(
363+
directory,
364+
ObjectId.createFromTime(offset - i).toHexString() + '_log'
365+
);
366+
await fs.writeFile(filename, '0'.repeat(1024));
367+
paths.unshift(filename);
368+
}
369+
370+
expect(await getFilesState(paths)).to.equal('1111111111');
371+
await manager.cleanupOldLogFiles();
372+
expect(await getFilesState(paths)).to.equal('0000000011');
373+
});
374+
});
375+
210376
it('cleaning up old log files is a no-op by default', async function () {
211377
const manager = new MongoLogManager({
212378
directory: path.join('directory', 'nonexistent'),

packages/mongodb-log-writer/src/mongo-log-manager.ts

Lines changed: 67 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { ObjectId } from 'bson';
33
import { once } from 'events';
44
import { createWriteStream, promises as fs } from 'fs';
55
import { createGzip, constants as zlibConstants } from 'zlib';
6+
import { Heap } from 'heap-js';
67
import { MongoLogWriter } from './mongo-log-writer';
78
import { Writable } from 'stream';
89

@@ -36,40 +37,23 @@ export class MongoLogManager {
3637
this._options = options;
3738
}
3839

40+
private async deleteFile(path: string) {
41+
try {
42+
await fs.unlink(path);
43+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
44+
} catch (err: any) {
45+
if (err?.code !== 'ENOENT') {
46+
this._options.onerror(err as Error, path);
47+
}
48+
}
49+
}
50+
3951
/** Clean up log files older than `retentionDays`. */
4052
async cleanupOldLogFiles(maxDurationMs = 5_000): Promise<void> {
4153
const dir = this._options.directory;
42-
const sortedLogFiles: {
43-
fullPath: string;
44-
id: string;
45-
size?: number;
46-
}[] = [];
47-
let usedStorageSize = this._options.retentionGB ? 0 : -Infinity;
48-
54+
let dirHandle;
4955
try {
50-
const files = await fs.readdir(dir, { withFileTypes: true });
51-
for (const file of files) {
52-
const { id } =
53-
/^(?<id>[a-f0-9]{24})_log(\.gz)?$/i.exec(file.name)?.groups ?? {};
54-
55-
if (!file.isFile() || !id) {
56-
continue;
57-
}
58-
59-
const fullPath = path.join(dir, file.name);
60-
let size: number | undefined;
61-
if (this._options.retentionGB) {
62-
try {
63-
size = (await fs.stat(fullPath)).size;
64-
usedStorageSize += size;
65-
} catch (err) {
66-
this._options.onerror(err as Error, fullPath);
67-
continue;
68-
}
69-
}
70-
71-
sortedLogFiles.push({ fullPath, id, size });
72-
}
56+
dirHandle = await fs.opendir(dir);
7357
} catch {
7458
return;
7559
}
@@ -78,58 +62,78 @@ export class MongoLogManager {
7862
// Delete files older than N days
7963
const deletionCutoffTimestamp =
8064
deletionStartTimestamp - this._options.retentionDays * 86400 * 1000;
65+
// Store the known set of least recent files in a heap in order to be able to
66+
// delete all but the most recent N files.
67+
const leastRecentFileHeap = new Heap<{
68+
fileTimestamp: number;
69+
fullPath: string;
70+
fileSize: number | undefined;
71+
}>((a, b) => a.fileTimestamp - b.fileTimestamp);
8172

82-
const storageSizeLimit = this._options.retentionGB
83-
? this._options.retentionGB * 1024 * 1024 * 1024
84-
: Infinity;
73+
let usedStorageSize = this._options.retentionGB ? 0 : -Infinity;
8574

86-
for await (const { id, fullPath } of [...sortedLogFiles]) {
75+
for await (const dirent of dirHandle) {
8776
// Cap the overall time spent inside this function. Consider situations like
8877
// a large number of machines using a shared network-mounted $HOME directory
8978
// where lots and lots of log files end up and filesystem operations happen
9079
// with network latency.
9180
if (Date.now() - deletionStartTimestamp > maxDurationMs) break;
9281

82+
if (!dirent.isFile()) continue;
83+
const { id } =
84+
/^(?<id>[a-f0-9]{24})_log(\.gz)?$/i.exec(dirent.name)?.groups ?? {};
85+
if (!id) continue;
86+
9387
const fileTimestamp = +new ObjectId(id).getTimestamp();
94-
let toDelete:
95-
| {
96-
fullPath: string;
97-
/** If the file wasn't deleted right away and there is a
98-
* retention size limit, its size should be accounted */
99-
fileSize?: number;
100-
}
101-
| undefined;
88+
const fullPath = path.join(dir, dirent.name);
10289

10390
// If the file is older than expected, delete it. If the file is recent,
10491
// add it to the list of seen files, and if that list is too large, remove
10592
// the least recent file we've seen so far.
10693
if (fileTimestamp < deletionCutoffTimestamp) {
107-
toDelete = {
108-
fullPath,
109-
};
110-
} else if (this._options.retentionGB || this._options.maxLogFileCount) {
111-
const reachedMaxStorageSize = usedStorageSize > storageSizeLimit;
112-
const reachedMaxFileCount =
113-
this._options.maxLogFileCount &&
114-
sortedLogFiles.length > this._options.maxLogFileCount;
115-
116-
if (reachedMaxStorageSize || reachedMaxFileCount) {
117-
toDelete = sortedLogFiles.shift();
118-
}
94+
await this.deleteFile(fullPath);
95+
continue;
11996
}
12097

121-
if (!toDelete) continue;
122-
try {
123-
await fs.unlink(toDelete.fullPath);
124-
if (toDelete.fileSize) {
125-
usedStorageSize -= toDelete.fileSize;
126-
}
127-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
128-
} catch (err: any) {
129-
if (err?.code !== 'ENOENT') {
98+
let fileSize: number | undefined;
99+
if (this._options.retentionGB) {
100+
try {
101+
fileSize = (await fs.stat(fullPath)).size;
102+
usedStorageSize += fileSize;
103+
} catch (err) {
130104
this._options.onerror(err as Error, fullPath);
105+
continue;
131106
}
132107
}
108+
109+
if (this._options.maxLogFileCount || this._options.retentionGB) {
110+
leastRecentFileHeap.push({ fullPath, fileTimestamp, fileSize });
111+
}
112+
113+
if (
114+
this._options.maxLogFileCount &&
115+
leastRecentFileHeap.size() > this._options.maxLogFileCount
116+
) {
117+
const toDelete = leastRecentFileHeap.pop();
118+
if (!toDelete) continue;
119+
await this.deleteFile(toDelete.fullPath);
120+
usedStorageSize -= toDelete.fileSize ?? 0;
121+
}
122+
}
123+
124+
if (this._options.retentionGB) {
125+
const storageSizeLimit = this._options.retentionGB * 1024 * 1024 * 1024;
126+
127+
for (const file of leastRecentFileHeap) {
128+
if (Date.now() - deletionStartTimestamp > maxDurationMs) break;
129+
130+
if (usedStorageSize <= storageSizeLimit) break;
131+
132+
if (!file.fileSize) continue;
133+
134+
await this.deleteFile(file.fullPath);
135+
usedStorageSize -= file.fileSize;
136+
}
133137
}
134138
}
135139

0 commit comments

Comments
 (0)