@@ -14,12 +14,17 @@ use http::Method;
1414use reqwest:: Client ;
1515use serde:: de:: DeserializeOwned ;
1616use serde:: Serialize ;
17+ use tokio:: time;
1718use tracing:: debug;
1819
1920use crate :: api:: * ;
2021
2122const API_REQUEST_TIMEOUT : Duration = Duration :: from_secs ( 5 ) ;
2223const MAX_ATTEMPTS : usize = 3 ;
24+ /// How long to wait after the second failed API request before trying again.
25+ /// This is relatively long since if the second try failed, it probably means
26+ /// that the backend is down, which could be the case for a while.
27+ const RETRY_INTERVAL : Duration = Duration :: from_secs ( 15 ) ;
2328
2429/// Enumerates the base urls that can be used in an API call.
2530#[ derive( Copy , Clone ) ]
@@ -174,11 +179,21 @@ impl LexeApiClient {
174179 endpoint : & str ,
175180 data : & D ,
176181 ) -> Result < T , ApiError > {
182+ let mut retry_timer = time:: interval ( RETRY_INTERVAL ) ;
183+
177184 // Try the first n-1 times, return early if successful
178185 for _ in 0 ..MAX_ATTEMPTS - 1 {
179186 let res = self . execute ( method, base, ver, endpoint, data) . await ;
180187 if res. is_ok ( ) {
181188 return res;
189+ } else {
190+ // Since the first tick resolves immediately, and we tick only
191+ // on failures, the first failed attempt is immediately followed
192+ // up with second attempt (to encode that sometimes messages are
193+ // dropped during normal operation), but all following attempts
194+ // wait the full timeout (to encode that the node backend is
195+ // probably down so we want to wait a relatively long timeout).
196+ retry_timer. tick ( ) . await ;
182197 }
183198 }
184199
0 commit comments