Skip to content

Commit 4229317

Browse files
authored
Stray file detection (#8200)
* Stray file detection * Fix assets
1 parent b68c32c commit 4229317

File tree

1 file changed

+75
-7
lines changed

1 file changed

+75
-7
lines changed

app/lib/package/api_export/exported_api.dart

Lines changed: 75 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,18 @@ final class ExportedApi {
7676
/// This will remove prefixes other than `latest/` where [shouldGCVersion]
7777
/// returns true.
7878
Future<void> garbageCollect(Set<String> allPackageNames) async {
79+
_log.info(
80+
'Garbage collection started, with ${allPackageNames.length} package names',
81+
);
7982
await Future.wait([
8083
_gcOldPrefixes(),
8184
..._prefixes.map((prefix) => _gcPrefix(prefix, allPackageNames)),
8285
]);
86+
// Check if there are any stray files left after we've done a full GC cycle.
87+
await Future.wait([
88+
..._prefixes.map((prefix) => _findStrayFiles(prefix)),
89+
]);
90+
_log.info('Garbage collection completed.');
8391
}
8492

8593
/// Garbage collect unknown packages from [prefix].
@@ -175,6 +183,41 @@ final class ExportedApi {
175183
}));
176184
}
177185

186+
/// Search for stray files in [prefix]
187+
///
188+
/// We detect stray files by looking at the the [_validatedCustomHeader].
189+
/// Whenever we save a file we update the [_validatedCustomHeader] timestamp,
190+
/// if it's older than [_updateValidatedAfter]. Thus, if something haven't
191+
/// been updated for [_unvalidatedStrayFileAfter], then it's probably a stray
192+
/// file that we don't understand.
193+
///
194+
/// If there are stray files we don't really dare to delete them. They could
195+
/// be introduced by a newer [runtimeVersion]. Or it could bug, but if that's
196+
/// the case, what are the implications of deleting such files?
197+
/// In all such cases, it's best alert and leave deletion of files as bug to
198+
/// be fixed.
199+
Future<void> _findStrayFiles(String prefix) async {
200+
final validatedDeadline = clock.agoBy(_unvalidatedStrayFileAfter);
201+
await _listBucket(prefix: prefix, delimiter: '', (item) async {
202+
if (!item.isObject) {
203+
throw AssertionError('there should only be objects here');
204+
}
205+
206+
// TODO: Consider creating new wrappers for GCS, as the list API
207+
// end-point includes meta-data, etc. Thus, we'd avoid this unnecessary
208+
// lookup for every file.
209+
if (await _bucket.tryInfo(item.name) case final info?) {
210+
if (info.metadata.validated.isBefore(validatedDeadline)) {
211+
_log.pubNoticeShout(
212+
'stray-file',
213+
'The "validated" timestamp of ${item.name} indicates'
214+
' that it is not being updated!',
215+
);
216+
}
217+
}
218+
});
219+
}
220+
178221
Future<void> _listBucket(
179222
FutureOr<void> Function(BucketEntry entry) each, {
180223
required String prefix,
@@ -294,6 +337,31 @@ sealed class ExportedObject {
294337
}
295338
}
296339

340+
/// Custom meta-data key for the 'validated' field.
341+
///
342+
/// This will be stored on GCS objects as the following header:
343+
/// ```
344+
/// x-goog-meta-validated: <date-time>
345+
/// ```
346+
const _validatedCustomHeader = 'validated';
347+
348+
/// Duration after which the [_validatedCustomHeader] should be updated.
349+
///
350+
/// When updating a file, we can check the md5 hash, if it matches we don't need
351+
/// to update the file. But if "validated" timestamp is older than
352+
/// [_updateValidatedAfter], then we have to update the meta-data.
353+
///
354+
/// This allows us to detect files that are present, but not being updated
355+
/// anymore. We classify such files as _stray files_ and write alerts to logs.
356+
const _updateValidatedAfter = Duration(days: 1);
357+
358+
/// Duration after which a file that haven't been updated is considered stray!
359+
///
360+
/// We don't delete stray files, because there shouldn't be any, so a stray file
361+
/// is always indicative of a bug. Nevertheless, we write alerts to logs, so
362+
/// that these inconsistencies can be detected.
363+
const _unvalidatedStrayFileAfter = Duration(days: 7);
364+
297365
/// Interface for an exported JSON file.
298366
///
299367
/// This will write JSON as gzipped UTF-8, adding headers for
@@ -316,7 +384,7 @@ final class ExportedJsonFile<T> extends ExportedObject {
316384
contentEncoding: 'gzip',
317385
cacheControl: 'public, max-age=${_maxAge.inSeconds}',
318386
custom: {
319-
'validated': clock.now().toIso8601String(),
387+
_validatedCustomHeader: clock.now().toIso8601String(),
320388
},
321389
);
322390
}
@@ -363,7 +431,7 @@ final class ExportedBlob extends ExportedObject {
363431
cacheControl: 'public, max-age=${_maxAge.inSeconds}',
364432
contentDisposition: 'attachment; filename="$_filename"',
365433
custom: {
366-
'validated': clock.now().toIso8601String(),
434+
_validatedCustomHeader: clock.now().toIso8601String(),
367435
},
368436
);
369437
}
@@ -401,7 +469,7 @@ final class ExportedBlob extends ExportedObject {
401469
// we only need to update the "validated" metadata. And we only
402470
// need to update the "validated" timestamp if it's older than
403471
// _retouchDeadline
404-
final retouchDeadline = clock.agoBy(_revalidateAfter);
472+
final retouchDeadline = clock.agoBy(_updateValidatedAfter);
405473
if (dstInfo.metadata.validated.isBefore(retouchDeadline)) {
406474
await _owner._bucket.updateMetadata(dst, metadata);
407475
}
@@ -422,8 +490,6 @@ final class ExportedBlob extends ExportedObject {
422490
}
423491
}
424492

425-
const _revalidateAfter = Duration(days: 1);
426-
427493
extension on Bucket {
428494
Future<void> writeBytesIfDifferent(
429495
String name,
@@ -432,7 +498,8 @@ extension on Bucket {
432498
) async {
433499
if (await tryInfo(name) case final info?) {
434500
if (info.isSameContent(bytes)) {
435-
if (info.metadata.validated.isBefore(clock.agoBy(_revalidateAfter))) {
501+
if (info.metadata.validated
502+
.isBefore(clock.agoBy(_updateValidatedAfter))) {
436503
await updateMetadata(name, metadata);
437504
}
438505
return;
@@ -468,6 +535,7 @@ extension on ObjectInfo {
468535

469536
extension on ObjectMetadata {
470537
DateTime get validated {
471-
return DateTime.tryParse(custom?['validated'] ?? '') ?? DateTime(0);
538+
final validatedHeader = custom?[_validatedCustomHeader] ?? '';
539+
return DateTime.tryParse(validatedHeader) ?? DateTime(0);
472540
}
473541
}

0 commit comments

Comments
 (0)