@@ -62,7 +62,12 @@ export class MongoLogManager {
62
62
}
63
63
64
64
/** Clean up log files older than `retentionDays`. */
65
- async cleanupOldLogFiles ( maxDurationMs = 5_000 ) : Promise < void > {
65
+ async cleanupOldLogFiles ( maxDurationMs = 5_000 , remainingRetries = 1 ) : Promise < void > {
66
+ const deletionStartTimestamp = Date . now ( ) ;
67
+ // Delete files older than N days
68
+ const deletionCutoffTimestamp =
69
+ deletionStartTimestamp - this . _options . retentionDays * 86400 * 1000 ;
70
+
66
71
const dir = this . _options . directory ;
67
72
let dirHandle ;
68
73
try {
@@ -71,10 +76,6 @@ export class MongoLogManager {
71
76
return ;
72
77
}
73
78
74
- const deletionStartTimestamp = Date . now ( ) ;
75
- // Delete files older than N days
76
- const deletionCutoffTimestamp =
77
- deletionStartTimestamp - this . _options . retentionDays * 86400 * 1000 ;
78
79
// Store the known set of least recent files in a heap in order to be able to
79
80
// delete all but the most recent N files.
80
81
const leastRecentFileHeap = new Heap < {
@@ -85,55 +86,66 @@ export class MongoLogManager {
85
86
86
87
let usedStorageSize = this . _options . retentionGB ? 0 : - Infinity ;
87
88
88
- for await ( const dirent of dirHandle ) {
89
- // Cap the overall time spent inside this function. Consider situations like
90
- // a large number of machines using a shared network-mounted $HOME directory
91
- // where lots and lots of log files end up and filesystem operations happen
92
- // with network latency.
93
- if ( Date . now ( ) - deletionStartTimestamp > maxDurationMs ) break ;
94
-
95
- if ( ! dirent . isFile ( ) ) continue ;
96
- const logRegExp = new RegExp (
97
- `^${ this . prefix } (?<id>[a-f0-9]{24})_log(\\.gz)?$` ,
98
- 'i'
99
- ) ;
100
- const { id } = logRegExp . exec ( dirent . name ) ?. groups ?? { } ;
101
- if ( ! id ) continue ;
102
-
103
- const fileTimestamp = + new ObjectId ( id ) . getTimestamp ( ) ;
104
- const fullPath = path . join ( dir , dirent . name ) ;
105
-
106
- // If the file is older than expected, delete it. If the file is recent,
107
- // add it to the list of seen files, and if that list is too large, remove
108
- // the least recent file we've seen so far.
109
- if ( fileTimestamp < deletionCutoffTimestamp ) {
110
- await this . deleteFile ( fullPath ) ;
111
- continue ;
112
- }
113
-
114
- let fileSize : number | undefined ;
115
- if ( this . _options . retentionGB ) {
116
- try {
117
- fileSize = ( await fs . stat ( fullPath ) ) . size ;
118
- usedStorageSize += fileSize ;
119
- } catch ( err ) {
120
- this . _options . onerror ( err as Error , fullPath ) ;
89
+ try {
90
+ for await ( const dirent of dirHandle ) {
91
+ // Cap the overall time spent inside this function. Consider situations like
92
+ // a large number of machines using a shared network-mounted $HOME directory
93
+ // where lots and lots of log files end up and filesystem operations happen
94
+ // with network latency.
95
+ if ( Date . now ( ) - deletionStartTimestamp > maxDurationMs ) break ;
96
+
97
+ if ( ! dirent . isFile ( ) ) continue ;
98
+ const logRegExp = new RegExp (
99
+ `^${ this . prefix } (?<id>[a-f0-9]{24})_log(\\.gz)?$` ,
100
+ 'i'
101
+ ) ;
102
+ const { id } = logRegExp . exec ( dirent . name ) ?. groups ?? { } ;
103
+ if ( ! id ) continue ;
104
+
105
+ const fileTimestamp = + new ObjectId ( id ) . getTimestamp ( ) ;
106
+ const fullPath = path . join ( dir , dirent . name ) ;
107
+
108
+ // If the file is older than expected, delete it. If the file is recent,
109
+ // add it to the list of seen files, and if that list is too large, remove
110
+ // the least recent file we've seen so far.
111
+ if ( fileTimestamp < deletionCutoffTimestamp ) {
112
+ await this . deleteFile ( fullPath ) ;
121
113
continue ;
122
114
}
115
+
116
+ let fileSize : number | undefined ;
117
+ if ( this . _options . retentionGB ) {
118
+ try {
119
+ fileSize = ( await fs . stat ( fullPath ) ) . size ;
120
+ usedStorageSize += fileSize ;
121
+ } catch ( err ) {
122
+ this . _options . onerror ( err as Error , fullPath ) ;
123
+ continue ;
124
+ }
125
+ }
126
+
127
+ if ( this . _options . maxLogFileCount || this . _options . retentionGB ) {
128
+ leastRecentFileHeap . push ( { fullPath, fileTimestamp, fileSize } ) ;
129
+ }
130
+
131
+ if (
132
+ this . _options . maxLogFileCount &&
133
+ leastRecentFileHeap . size ( ) > this . _options . maxLogFileCount
134
+ ) {
135
+ const toDelete = leastRecentFileHeap . pop ( ) ;
136
+ if ( ! toDelete ) continue ;
137
+ await this . deleteFile ( toDelete . fullPath ) ;
138
+ usedStorageSize -= toDelete . fileSize ?? 0 ;
139
+ }
123
140
}
124
-
125
- if ( this . _options . maxLogFileCount || this . _options . retentionGB ) {
126
- leastRecentFileHeap . push ( { fullPath, fileTimestamp, fileSize } ) ;
127
- }
128
-
129
- if (
130
- this . _options . maxLogFileCount &&
131
- leastRecentFileHeap . size ( ) > this . _options . maxLogFileCount
132
- ) {
133
- const toDelete = leastRecentFileHeap . pop ( ) ;
134
- if ( ! toDelete ) continue ;
135
- await this . deleteFile ( toDelete . fullPath ) ;
136
- usedStorageSize -= toDelete . fileSize ?? 0 ;
141
+ } catch ( statErr : any ) {
142
+ // Multiple processes may attempt to clean up log files in parallel.
143
+ // A situation can arise where one process tries to read a file
144
+ // that another process has already unlinked (see MONGOSH-1914).
145
+ // To handle such scenarios, we will catch lstat errors and retry cleaning up
146
+ // to let different processes reach out to different log files.
147
+ if ( statErr . code === 'ENOENT' && remainingRetries > 0 ) {
148
+ await this . cleanupOldLogFiles ( maxDurationMs - ( Date . now ( ) - deletionStartTimestamp ) , remainingRetries - 1 ) ;
137
149
}
138
150
}
139
151
0 commit comments