@@ -996,22 +996,9 @@ class UpdateMachine {
996996 }());
997997 }
998998
999- // This is static so that it persists through new UpdateMachine instances
1000- // as we attempt to fix things by reloading data from scratch. In principle
1001- // it could also be per-account (or per-realm or per-server); but currently
1002- // we skip that complication, as well as attempting to reset backoff on
1003- // later success. After all, these unexpected errors should be uncommon;
1004- // ideally they'd never happen.
1005- static BackoffMachine get _unexpectedErrorBackoffMachine {
1006- return __unexpectedErrorBackoffMachine
1007- ?? = BackoffMachine (maxBound: const Duration (seconds: 60 ));
1008- }
1009- static BackoffMachine ? __unexpectedErrorBackoffMachine;
1010-
1011999 void poll () async {
10121000 assert (! _disposed);
10131001 try {
1014- BackoffMachine ? backoffMachine;
10151002 while (true ) {
10161003 if (_debugLoopSignal != null ) {
10171004 await _debugLoopSignal! .future;
@@ -1027,34 +1014,13 @@ class UpdateMachine {
10271014 result = await getEvents (store.connection,
10281015 queueId: queueId, lastEventId: lastEventId);
10291016 if (_disposed) return ;
1030- } catch (e) {
1017+ } catch (e, stackTrace ) {
10311018 if (_disposed) return ;
1032- final shouldRetry = _triagePollRequestError (e);
1033- if (! shouldRetry) rethrow ;
1034- await (backoffMachine ?? = BackoffMachine ()).wait ();
1019+ await _handlePollRequestError (e, stackTrace); // may rethrow
10351020 if (_disposed) return ;
1036- assert (debugLog ('… Backoff wait complete, retrying poll.' ));
10371021 continue ;
10381022 }
1039-
1040- // After one successful request, we reset backoff to its initial state.
1041- // That way if the user is off the network and comes back on, the app
1042- // doesn't wind up in a state where it's slow to recover the next time
1043- // one request fails.
1044- //
1045- // This does mean that if the server is having trouble and handling some
1046- // but not all of its requests, we'll end up doing a lot more retries than
1047- // if we stayed at the max backoff interval; partway toward what would
1048- // happen if we weren't backing off at all.
1049- //
1050- // But at least for [getEvents] requests, as here, it should be OK,
1051- // because this is a long-poll. That means a typical successful request
1052- // takes a long time to come back; in fact longer than our max backoff
1053- // duration (which is 10 seconds). So if we're getting a mix of successes
1054- // and failures, the successes themselves should space out the requests.
1055- backoffMachine = null ;
1056-
1057- _clearReportingErrorsToUser ();
1023+ _clearPollErrors ();
10581024
10591025 final events = result.events;
10601026 for (final event in events) {
@@ -1073,29 +1039,26 @@ class UpdateMachine {
10731039 }
10741040 } catch (e) {
10751041 if (_disposed) return ;
1076-
1077- // An error occurred, other than the transient request errors we retry on.
1078- // This means either a lost/expired event queue on the server (which is
1079- // normal after the app is offline for a period like 10 minutes),
1080- // or an unexpected exception representing a bug in our code or the server.
1081- // Either way, the show must go on. So reload server data from scratch.
1082-
1083- final isUnexpected = _triagePollError (e);
1084-
1085- if (isUnexpected) {
1086- // We don't know the cause of the failure; it might well keep happening.
1087- // Avoid creating a retry storm.
1088- await _unexpectedErrorBackoffMachine.wait ();
1089- if (_disposed) return ;
1090- }
1091-
1092- // This disposes the store, which disposes this update machine.
1093- await store._globalStore._reloadPerAccount (store.accountId);
1094- assert (debugLog ('… Event queue replaced.' ));
1042+ await _handlePollError (e);
1043+ assert (_disposed);
10951044 return ;
10961045 }
10971046 }
10981047
1048+ // This is static so that it persists through new UpdateMachine instances
1049+ // as we attempt to fix things by reloading data from scratch. In principle
1050+ // it could also be per-account (or per-realm or per-server); but currently
1051+ // we skip that complication, as well as attempting to reset backoff on
1052+ // later success. After all, these unexpected errors should be uncommon;
1053+ // ideally they'd never happen.
1054+ static BackoffMachine get _unexpectedErrorBackoffMachine {
1055+ return __unexpectedErrorBackoffMachine
1056+ ?? = BackoffMachine (maxBound: const Duration (seconds: 60 ));
1057+ }
1058+ static BackoffMachine ? __unexpectedErrorBackoffMachine;
1059+
1060+ BackoffMachine ? _pollBackoffMachine;
1061+
10991062 /// This controls when we start to report transient errors to the user when
11001063 /// polling.
11011064 ///
@@ -1105,24 +1068,49 @@ class UpdateMachine {
11051068
11061069 int _accumulatedTransientFailureCount = 0 ;
11071070
1108- void _clearReportingErrorsToUser () {
1071+ void _clearPollErrors () {
1072+ // After one successful request, we reset backoff to its initial state.
1073+ // That way if the user is off the network and comes back on, the app
1074+ // doesn't wind up in a state where it's slow to recover the next time
1075+ // one request fails.
1076+ //
1077+ // This does mean that if the server is having trouble and handling some
1078+ // but not all of its requests, we'll end up doing a lot more retries than
1079+ // if we stayed at the max backoff interval; partway toward what would
1080+ // happen if we weren't backing off at all.
1081+ //
1082+ // But at least for [getEvents] requests, as here, it should be OK,
1083+ // because this is a long-poll. That means a typical successful request
1084+ // takes a long time to come back; in fact longer than our max backoff
1085+ // duration (which is 10 seconds). So if we're getting a mix of successes
1086+ // and failures, the successes themselves should space out the requests.
1087+ _pollBackoffMachine = null ;
1088+
11091089 store.isLoading = false ;
11101090 _accumulatedTransientFailureCount = 0 ;
11111091 reportErrorToUserBriefly (null );
11121092 }
11131093
1114- /// Sort out an error from the network request in [poll] .
1094+ /// Sort out an error from the network request in [poll] :
1095+ /// either wait for a backoff duration (and possibly report the error),
1096+ /// or rethrow.
11151097 ///
1116- /// If the request should be retried, this method returns true,
1098+ /// If the request should be retried, this method uses [_pollBackoffMachine]
1099+ /// to wait an appropriate backoff duration for that retry,
11171100 /// after reporting the error if appropriate to the user and/or developer.
1118- /// Otherwise, this method returns false with no side effects.
1119- bool _triagePollRequestError (Object error) {
1101+ ///
1102+ /// Otherwise this method rethrows the error, with no other side effects.
1103+ ///
1104+ /// See also:
1105+ /// * [_handlePollError] , which handles errors from the rest of [poll]
1106+ /// and errors this method rethrows.
1107+ Future <void > _handlePollRequestError (Object error, StackTrace stackTrace) async {
11201108 store.isLoading = true ;
11211109
11221110 if (error is ! ApiRequestException ) {
11231111 // Some unexpected error, outside even making the HTTP request.
11241112 // Definitely a bug in our code.
1125- return false ;
1113+ Error . throwWithStackTrace (error, stackTrace) ;
11261114 }
11271115
11281116 bool shouldReportToUser;
@@ -1142,30 +1130,40 @@ class UpdateMachine {
11421130 shouldReportToUser = true ;
11431131
11441132 case ZulipApiException (code: 'BAD_EVENT_QUEUE_ID' ):
1145- return false ;
1133+ Error . throwWithStackTrace (error, stackTrace) ;
11461134
11471135 case ZulipApiException ():
11481136 case MalformedServerResponseException ():
11491137 // Either a 4xx we didn't expect, or a malformed response;
11501138 // in either case, a mismatch of the client's expectations to the
11511139 // server's behavior, and therefore a bug in one or the other.
11521140 // TODO(#1054) handle auth failures specifically
1153- return false ;
1141+ Error . throwWithStackTrace (error, stackTrace) ;
11541142 }
11551143
11561144 assert (debugLog ('Transient error polling event queue for $store : $error \n '
11571145 'Backing off, then will retry…' ));
11581146 if (shouldReportToUser) {
11591147 _maybeReportToUserTransientError (error);
11601148 }
1161- return true ;
1149+ await (_pollBackoffMachine ?? = BackoffMachine ()).wait ();
1150+ if (_disposed) return ;
1151+ assert (debugLog ('… Backoff wait complete, retrying poll.' ));
11621152 }
11631153
1164- /// Sort out an error in [poll] .
1154+ /// Deal with an error in [poll] : reload server data to replace the store,
1155+ /// after reporting the error as appropriate to the user and/or developer.
11651156 ///
1166- /// Reports the error if appropriate to the user and/or developer;
1167- /// then returns true just if the error was unexpected.
1168- bool _triagePollError (Object error) {
1157+ /// See also:
1158+ /// * [_handlePollRequestError] , which handles certain errors
1159+ /// and causes them not to reach this method.
1160+ Future <void > _handlePollError (Object error) async {
1161+ // An error occurred, other than the transient request errors we retry on.
1162+ // This means either a lost/expired event queue on the server (which is
1163+ // normal after the app is offline for a period like 10 minutes),
1164+ // or an unexpected exception representing a bug in our code or the server.
1165+ // Either way, the show must go on. So reload server data from scratch.
1166+
11691167 store.isLoading = true ;
11701168
11711169 bool isUnexpected;
@@ -1202,7 +1200,17 @@ class UpdateMachine {
12021200 // but our remedy is the same either way.
12031201 isUnexpected = true ;
12041202 }
1205- return isUnexpected;
1203+
1204+ if (isUnexpected) {
1205+ // We don't know the cause of the failure; it might well keep happening.
1206+ // Avoid creating a retry storm.
1207+ await _unexpectedErrorBackoffMachine.wait ();
1208+ if (_disposed) return ;
1209+ }
1210+
1211+ await store._globalStore._reloadPerAccount (store.accountId);
1212+ assert (_disposed);
1213+ assert (debugLog ('… Event queue replaced.' ));
12061214 }
12071215
12081216 /// This only reports transient errors after reaching
0 commit comments