@@ -76,10 +76,18 @@ final class ExportedApi {
7676 /// This will remove prefixes other than `latest/` where [shouldGCVersion]
7777 /// returns true.
7878 Future <void > garbageCollect (Set <String > allPackageNames) async {
79+ _log.info (
80+ 'Garbage collection started, with ${allPackageNames .length } package names' ,
81+ );
7982 await Future .wait ([
8083 _gcOldPrefixes (),
8184 ..._prefixes.map ((prefix) => _gcPrefix (prefix, allPackageNames)),
8285 ]);
86+ // Check if there are any stray files left after we've done a full GC cycle.
87+ await Future .wait ([
88+ ..._prefixes.map ((prefix) => _findStrayFiles (prefix)),
89+ ]);
90+ _log.info ('Garbage collection completed.' );
8391 }
8492
8593 /// Garbage collect unknown packages from [prefix] .
@@ -175,6 +183,41 @@ final class ExportedApi {
175183 }));
176184 }
177185
186+ /// Search for stray files in [prefix]
187+ ///
188+ /// We detect stray files by looking at the the [_validatedCustomHeader] .
189+ /// Whenever we save a file we update the [_validatedCustomHeader] timestamp,
190+ /// if it's older than [_updateValidatedAfter] . Thus, if something haven't
191+ /// been updated for [_unvalidatedStrayFileAfter] , then it's probably a stray
192+ /// file that we don't understand.
193+ ///
194+ /// If there are stray files we don't really dare to delete them. They could
195+ /// be introduced by a newer [runtimeVersion] . Or it could bug, but if that's
196+ /// the case, what are the implications of deleting such files?
197+ /// In all such cases, it's best alert and leave deletion of files as bug to
198+ /// be fixed.
199+ Future <void > _findStrayFiles (String prefix) async {
200+ final validatedDeadline = clock.agoBy (_unvalidatedStrayFileAfter);
201+ await _listBucket (prefix: prefix, delimiter: '' , (item) async {
202+ if (! item.isObject) {
203+ throw AssertionError ('there should only be objects here' );
204+ }
205+
206+ // TODO: Consider creating new wrappers for GCS, as the list API
207+ // end-point includes meta-data, etc. Thus, we'd avoid this unnecessary
208+ // lookup for every file.
209+ if (await _bucket.tryInfo (item.name) case final info? ) {
210+ if (info.metadata.validated.isBefore (validatedDeadline)) {
211+ _log.pubNoticeShout (
212+ 'stray-file' ,
213+ 'The "validated" timestamp of ${item .name } indicates'
214+ ' that it is not being updated!' ,
215+ );
216+ }
217+ }
218+ });
219+ }
220+
178221 Future <void > _listBucket (
179222 FutureOr <void > Function (BucketEntry entry) each, {
180223 required String prefix,
@@ -294,6 +337,31 @@ sealed class ExportedObject {
294337 }
295338}
296339
340+ /// Custom meta-data key for the 'validated' field.
341+ ///
342+ /// This will be stored on GCS objects as the following header:
343+ /// ```
344+ /// x-goog-meta-validated: <date-time>
345+ /// ```
346+ const _validatedCustomHeader = 'validated' ;
347+
348+ /// Duration after which the [_validatedCustomHeader] should be updated.
349+ ///
350+ /// When updating a file, we can check the md5 hash, if it matches we don't need
351+ /// to update the file. But if "validated" timestamp is older than
352+ /// [_updateValidatedAfter] , then we have to update the meta-data.
353+ ///
354+ /// This allows us to detect files that are present, but not being updated
355+ /// anymore. We classify such files as _stray files_ and write alerts to logs.
356+ const _updateValidatedAfter = Duration (days: 1 );
357+
358+ /// Duration after which a file that haven't been updated is considered stray!
359+ ///
360+ /// We don't delete stray files, because there shouldn't be any, so a stray file
361+ /// is always indicative of a bug. Nevertheless, we write alerts to logs, so
362+ /// that these inconsistencies can be detected.
363+ const _unvalidatedStrayFileAfter = Duration (days: 7 );
364+
297365/// Interface for an exported JSON file.
298366///
299367/// This will write JSON as gzipped UTF-8, adding headers for
@@ -316,7 +384,7 @@ final class ExportedJsonFile<T> extends ExportedObject {
316384 contentEncoding: 'gzip' ,
317385 cacheControl: 'public, max-age=${_maxAge .inSeconds }' ,
318386 custom: {
319- 'validated' : clock.now ().toIso8601String (),
387+ _validatedCustomHeader : clock.now ().toIso8601String (),
320388 },
321389 );
322390 }
@@ -363,7 +431,7 @@ final class ExportedBlob extends ExportedObject {
363431 cacheControl: 'public, max-age=${_maxAge .inSeconds }' ,
364432 contentDisposition: 'attachment; filename="$_filename "' ,
365433 custom: {
366- 'validated' : clock.now ().toIso8601String (),
434+ _validatedCustomHeader : clock.now ().toIso8601String (),
367435 },
368436 );
369437 }
@@ -401,7 +469,7 @@ final class ExportedBlob extends ExportedObject {
401469 // we only need to update the "validated" metadata. And we only
402470 // need to update the "validated" timestamp if it's older than
403471 // _retouchDeadline
404- final retouchDeadline = clock.agoBy (_revalidateAfter );
472+ final retouchDeadline = clock.agoBy (_updateValidatedAfter );
405473 if (dstInfo.metadata.validated.isBefore (retouchDeadline)) {
406474 await _owner._bucket.updateMetadata (dst, metadata);
407475 }
@@ -422,8 +490,6 @@ final class ExportedBlob extends ExportedObject {
422490 }
423491}
424492
425- const _revalidateAfter = Duration (days: 1 );
426-
427493extension on Bucket {
428494 Future <void > writeBytesIfDifferent (
429495 String name,
@@ -432,7 +498,8 @@ extension on Bucket {
432498 ) async {
433499 if (await tryInfo (name) case final info? ) {
434500 if (info.isSameContent (bytes)) {
435- if (info.metadata.validated.isBefore (clock.agoBy (_revalidateAfter))) {
501+ if (info.metadata.validated
502+ .isBefore (clock.agoBy (_updateValidatedAfter))) {
436503 await updateMetadata (name, metadata);
437504 }
438505 return ;
@@ -468,6 +535,7 @@ extension on ObjectInfo {
468535
469536extension on ObjectMetadata {
470537 DateTime get validated {
471- return DateTime .tryParse (custom? ['validated' ] ?? '' ) ?? DateTime (0 );
538+ final validatedHeader = custom? [_validatedCustomHeader] ?? '' ;
539+ return DateTime .tryParse (validatedHeader) ?? DateTime (0 );
472540 }
473541}
0 commit comments