Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 44 additions & 14 deletions packages/mongodb-log-writer/src/mongo-log-manager.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,19 @@ describe('MongoLogManager', function () {
}
});

const getFilesState = async (paths: string[]) => {
return (
await Promise.all(
paths.map((path) =>
fs.stat(path).then(
() => 1,
() => 0
)
)
)
).join('');
};

it('cleans up least recent log files when requested', async function () {
const manager = new MongoLogManager({
directory,
Expand All @@ -106,21 +119,38 @@ describe('MongoLogManager', function () {
paths.unshift(filename);
}

const getFiles = async () => {
return (
await Promise.all(
paths.map((path) =>
fs.stat(path).then(
() => 1,
() => 0
)
)
)
).join('');
};
expect(await getFiles()).to.equal('1111111111');
expect(await getFilesState(paths)).to.equal('1111111111');
await manager.cleanupOldLogFiles();
expect(await getFilesState(paths)).to.equal('0000011111');
});

it('cleans up least recent log files when requested with a storage limit', async function () {
const manager = new MongoLogManager({
directory,
retentionDays,
maxLogFileCount: 1000,
// 6 KB
retentionGB: 6 / 1024 / 1024,
onwarn,
onerror,
});

const paths: string[] = [];
const offset = Math.floor(Date.now() / 1000);

// Create 10 files of 1 KB each.
for (let i = 0; i < 10; i++) {
const filename = path.join(
directory,
ObjectId.createFromTime(offset - i).toHexString() + '_log'
);
await fs.writeFile(filename, '0'.repeat(1024));
paths.unshift(filename);
}

expect(await getFilesState(paths)).to.equal('1111111111');
await manager.cleanupOldLogFiles();
expect(await getFiles()).to.equal('0000011111');
expect(await getFilesState(paths)).to.equal('0000111111');
});

it('cleaning up old log files is a no-op by default', async function () {
Expand Down
49 changes: 42 additions & 7 deletions packages/mongodb-log-writer/src/mongo-log-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ interface MongoLogOptions {
retentionDays: number;
/** The maximal number of log files which are kept. */
maxLogFileCount?: number;
/** The maximal GB of log files which are kept. */
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/** The maximal GB of log files which are kept. */
/** The maximal size of log files which are kept. */

retentionGB?: number;
/** A handler for warnings related to a specific filesystem path. */
onerror: (err: Error, path: string) => unknown | Promise<void>;
/** A handler for errors related to a specific filesystem path. */
Expand Down Expand Up @@ -54,8 +56,14 @@ export class MongoLogManager {
const leastRecentFileHeap = new Heap<{
fileTimestamp: number;
fullPath: string;
fileSize?: number;
}>((a, b) => a.fileTimestamp - b.fileTimestamp);

const storageSizeLimit = this._options.retentionGB
? this._options.retentionGB * 1024 * 1024 * 1024
: Infinity;
let usedStorageSize = this._options.retentionGB ? 0 : -Infinity;

for await (const dirent of dirHandle) {
// Cap the overall time spent inside this function. Consider situations like
// a large number of machines using a shared network-mounted $HOME directory
Expand All @@ -69,23 +77,50 @@ export class MongoLogManager {
if (!id) continue;
const fileTimestamp = +new ObjectId(id).getTimestamp();
const fullPath = path.join(dir, dirent.name);
let toDelete: string | undefined;
let toDelete:
| {
fullPath: string;
/** If the file wasn't deleted right away and there is a
* retention size limit, its size should be accounted */
fileSize?: number;
}
| undefined;

// If the file is older than expected, delete it. If the file is recent,
// add it to the list of seen files, and if that list is too large, remove
// the least recent file we've seen so far.
if (fileTimestamp < deletionCutoffTimestamp) {
toDelete = fullPath;
} else if (this._options.maxLogFileCount) {
leastRecentFileHeap.push({ fullPath, fileTimestamp });
if (leastRecentFileHeap.size() > this._options.maxLogFileCount) {
toDelete = leastRecentFileHeap.pop()?.fullPath;
toDelete = {
fullPath,
};
} else if (this._options.retentionGB || this._options.maxLogFileCount) {
const fileSize = (await fs.stat(fullPath)).size;
if (this._options.retentionGB) {
usedStorageSize += fileSize;
}

leastRecentFileHeap.push({
fullPath,
fileTimestamp,
fileSize,
});

const reachedMaxStorageSize = usedStorageSize > storageSizeLimit;
const reachedMaxFileCount =
this._options.maxLogFileCount &&
leastRecentFileHeap.size() > this._options.maxLogFileCount;

if (reachedMaxStorageSize || reachedMaxFileCount) {
toDelete = leastRecentFileHeap.pop();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So ... took some time to think this through, but the problem with this approach is that we'll generally want to delete the oldest files that are necessary to ensure that max file count + max file size limitations

But with this approach, that's no longer the case – you could end up with e.g. the most recent file being deleted, if it happens to be the first file we read from the directory and it's already over the size limit – right?

(I'm not sure if we need to let go of the heap as the underlying data structure, certainly wouldn't be the end of the world, but I'm pretty sure that the "read one file, check conditions, maybe delete one file" approach won't work anymore)

Copy link
Collaborator Author

@gagik gagik Feb 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm yeah, we could get rid of the heap, sort by creation date (which would be same as the log's name?) from the getgo and adjust the logic accordingly

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

which would be same as the log's name?

Yes, that's an assumption I'd still keep around 👍

}
}

if (!toDelete) continue;
try {
await fs.unlink(toDelete);
await fs.unlink(toDelete.fullPath);
if (toDelete.fileSize) {
usedStorageSize -= toDelete.fileSize;
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (err: any) {
if (err?.code !== 'ENOENT') {
Expand Down
Loading