@@ -19,6 +19,7 @@ import 'package:meta/meta.dart';
1919// ignore: implementation_imports
2020import 'package:pana/src/dartdoc/pub_dartdoc_data.dart' ;
2121import 'package:pool/pool.dart' ;
22+ import 'package:pub_dev/shared/monitoring.dart' ;
2223import 'package:retry/retry.dart' ;
2324
2425import '../../publisher/backend.dart' ;
@@ -55,6 +56,10 @@ final Logger _logger = Logger('pub.search.backend');
5556/// building or updating the snapshot.
5657const _defaultSnapshotBuildConcurrency = 8 ;
5758
59+ /// The (approximate) amount of time while the process holds the lock
60+ /// and works on index building and updates.
61+ const _maxLockHoldPeriod = Duration (days: 1 );
62+
5863/// Sets the backend service.
5964void registerSearchBackend (SearchBackend backend) =>
6065 ss.register (#_searchBackend, backend);
@@ -101,15 +106,30 @@ class SearchBackend {
101106 '$runtimeVersion /search/update-snapshot' ,
102107 expiration: Duration (minutes: 20 ),
103108 );
109+ final started = clock.now ();
104110 while (true ) {
105111 try {
106112 await lock.withClaim ((claim) async {
107- await doCreateAndUpdateSnapshot (claim);
113+ // Force timeout exception if the process does not release the lock.
114+ final lockHoldTimeout = _maxLockHoldPeriod + Duration (hours: 1 );
115+ await doCreateAndUpdateSnapshot (claim).timeout (lockHoldTimeout);
108116 });
109117 } catch (e, st) {
110- _logger.warning ('Snapshot update failed.' , e, st);
118+ _logger.pubNoticeShout (
119+ 'snapshot-building' , 'Snapshot update failed.' , e, st);
120+ // Force waiting at least an hour before we rethrow the exception,
121+ // otherwise we could get into a reboot loop that doesn't get much
122+ // real work done on the other tasks.
123+ final elapsed = clock.now ().difference (started);
124+ if (elapsed < Duration (hours: 1 )) {
125+ _logger.warning ('Waiting before rethrowing exception.' , e, st);
126+ await Future .delayed (Duration (hours: 1 ) - elapsed);
127+ }
128+ // Throwing here will crash the VM and force the instance to restart.
129+ rethrow ;
111130 }
112- // Wait for 1 minutes for sanity, before trying again.
131+
132+ // Allow another instance to get the lock and build the index.
113133 await Future .delayed (Duration (minutes: 1 ));
114134 }
115135 }
@@ -124,7 +144,7 @@ class SearchBackend {
124144
125145 // The claim will be released after a day, another process may
126146 // start to build the snapshot from scratch again.
127- final workUntil = clock.now ().add (Duration (days : 1 ) );
147+ final workUntil = clock.now ().add (_maxLockHoldPeriod );
128148
129149 // creating snapshot from scratch
130150 final snapshot = SearchSnapshot ();
0 commit comments