1
+ //! This module provides functionality for retrying operations with exponential backoff and jitter.
2
+ //!
3
+ //! The `RetryPolicy` struct defines the configuration for the retry behavior, including the maximum
4
+ //! number of retries, initial delay, maximum delay, and jitter.
5
+ //!
6
+ //! The `Sleep` trait abstracts the sleep functionality, allowing different implementations for
7
+ //! various async runtimes such as Tokio and async-std, as well as a synchronous implementation.
8
+ //!
9
+ //! The `retry_with_exponential_backoff` function retries the given operation according to the
10
+ //! specified retry policy, using exponential backoff and jitter to determine the delay between
11
+ //! retries. The function logs errors and retries the operation until it succeeds or the maximum
12
+ //! number of retries is reached.
13
+
14
+ use std:: future:: Future ;
15
+ use std:: time:: { Duration , SystemTime } ;
16
+ use opentelemetry:: otel_warn;
17
+
18
+ /// Configuration for retry policy.
19
+ #[ derive( Debug ) ]
20
+ pub ( super ) struct RetryPolicy {
21
+ /// Maximum number of retry attempts.
22
+ pub max_retries : usize ,
23
+ /// Initial delay in milliseconds before the first retry.
24
+ pub initial_delay_ms : u64 ,
25
+ /// Maximum delay in milliseconds between retries.
26
+ pub max_delay_ms : u64 ,
27
+ /// Maximum jitter in milliseconds to add to the delay.
28
+ pub jitter_ms : u64 ,
29
+ }
30
+
31
+ // Generates a random jitter value up to max_jitter
32
+ fn generate_jitter ( max_jitter : u64 ) -> u64 {
33
+ let now = SystemTime :: now ( ) ;
34
+ let nanos = now. duration_since ( SystemTime :: UNIX_EPOCH ) . unwrap ( ) . subsec_nanos ( ) ;
35
+ nanos as u64 % ( max_jitter + 1 )
36
+ }
37
+
38
+ // /// Trait to abstract the sleep functionality.
39
+ // pub trait Sleep {
40
+ // /// The future returned by the sleep function.
41
+ // type SleepFuture: Future<Output = ()>;
42
+
43
+ // /// Sleeps for the specified duration.
44
+ // fn sleep(duration: Duration) -> Self::SleepFuture;
45
+ // }
46
+
47
+ // /// Implementation of the Sleep trait for tokio::time::Sleep
48
+ // #[cfg(feature = "rt-tokio")]
49
+ // impl Sleep for tokio::time::Sleep {
50
+ // type SleepFuture = tokio::time::Sleep;
51
+
52
+ // fn sleep(duration: Duration) -> Self::SleepFuture {
53
+ // }
54
+ // }
55
+
56
+ // #[cfg(feature = "rt-async-std")]
57
+ // /// There is no direct equivalent to `tokio::time::Sleep` in `async-std`.
58
+ // /// Instead, we create a new struct `AsyncStdSleep` and implement the `Sleep`
59
+ // /// trait for it, boxing the future returned by `async_std::task::sleep` to fit
60
+ // /// the trait's associated type requirements.
61
+ // #[derive(Debug)]
62
+ // pub struct AsyncStdSleep;
63
+
64
+ // /// Implementation of the Sleep trait for async-std
65
+ // #[cfg(feature = "rt-async-std")]
66
+ // impl Sleep for AsyncStdSleep {
67
+ // type SleepFuture = Pin<Box<dyn Future<Output = ()> + Send>>;
68
+
69
+ // fn sleep(duration: Duration) -> Self::SleepFuture {
70
+ // Box::pin(async_std::task::sleep(duration))
71
+ // }
72
+ // }
73
+
74
+ // /// Implement the Sleep trait for synchronous sleep
75
+ // #[derive(Debug)]
76
+ // pub struct StdSleep;
77
+
78
+ // impl Sleep for StdSleep {
79
+ // type SleepFuture = std::future::Ready<()>;
80
+
81
+ // fn sleep(duration: Duration) -> Self::SleepFuture {
82
+ // std::thread::sleep(duration);
83
+ // std::future::ready(())
84
+ // }
85
+ // }
86
+
87
+ /// Retries the given operation with exponential backoff and jitter.
88
+ ///
89
+ /// # Arguments
90
+ ///
91
+ /// * `policy` - The retry policy configuration.
92
+ /// * `operation_name` - The name of the operation being retried.
93
+ /// * `operation` - The operation to be retried.
94
+ ///
95
+ /// # Returns
96
+ ///
97
+ /// A `Result` containing the operation's result or an error if the maximum retries are reached.
98
+ pub ( super ) async fn retry_with_exponential_backoff < F , Fut , T , E > (
99
+ policy : RetryPolicy ,
100
+ operation_name : & str ,
101
+ mut operation : F ,
102
+ ) -> Result < T , E >
103
+ where
104
+ F : FnMut ( ) -> Fut ,
105
+ E : std:: fmt:: Debug ,
106
+ Fut : Future < Output = Result < T , E > > ,
107
+ {
108
+ let mut attempt = 0 ;
109
+ let mut delay = policy. initial_delay_ms ;
110
+
111
+ loop {
112
+ match operation ( ) . await {
113
+ Ok ( result) => return Ok ( result) , // Return the result if the operation succeeds
114
+ Err ( err) if attempt < policy. max_retries => {
115
+ attempt += 1 ;
116
+ // Log the error and retry after a delay with jitter
117
+ otel_warn ! ( name: "OtlpRetry" , message = format!( "Retrying operation {:?} due to error: {:?}" , operation_name, err) ) ;
118
+ let jitter = generate_jitter ( policy. jitter_ms ) ;
119
+ let delay_with_jitter = std:: cmp:: min ( delay + jitter, policy. max_delay_ms ) ;
120
+
121
+ // Retry currently only supports tokio::time::sleep (for use with gRPC/tonic). This
122
+ // should be replaced with a more generic sleep function that works with async-std
123
+ // and a synchronous runtime in the future.
124
+ tokio:: time:: sleep ( Duration :: from_millis ( delay_with_jitter) ) . await ;
125
+
126
+ delay = std:: cmp:: min ( delay * 2 , policy. max_delay_ms ) ; // Exponential backoff
127
+ }
128
+ Err ( err) => return Err ( err) , // Return the error if max retries are reached
129
+ }
130
+ }
131
+ }
132
+
133
+ #[ cfg( test) ]
134
+ mod tests {
135
+ use super :: * ;
136
+ use tokio:: time:: timeout;
137
+ use std:: sync:: atomic:: { AtomicUsize , Ordering } ;
138
+ use std:: time:: Duration ;
139
+
140
+ // Test to ensure generate_jitter returns a value within the expected range
141
+ #[ tokio:: test]
142
+ async fn test_generate_jitter ( ) {
143
+ let max_jitter = 100 ;
144
+ let jitter = generate_jitter ( max_jitter) ;
145
+ assert ! ( jitter <= max_jitter) ;
146
+ }
147
+
148
+ // Test to ensure retry_with_exponential_backoff succeeds on the first attempt
149
+ #[ tokio:: test]
150
+ async fn test_retry_with_exponential_backoff_success ( ) {
151
+ let policy = RetryPolicy {
152
+ max_retries : 3 ,
153
+ initial_delay_ms : 100 ,
154
+ max_delay_ms : 1600 ,
155
+ jitter_ms : 100 ,
156
+ } ;
157
+
158
+ let result = retry_with_exponential_backoff ( policy, "test_operation" , || {
159
+ Box :: pin ( async { Ok :: < _ , ( ) > ( "success" ) } )
160
+ } ) . await ;
161
+
162
+ assert_eq ! ( result, Ok ( "success" ) ) ;
163
+ }
164
+
165
+ // Test to ensure retry_with_exponential_backoff retries the operation and eventually succeeds
166
+ #[ tokio:: test]
167
+ async fn test_retry_with_exponential_backoff_retries ( ) {
168
+ let policy = RetryPolicy {
169
+ max_retries : 3 ,
170
+ initial_delay_ms : 100 ,
171
+ max_delay_ms : 1600 ,
172
+ jitter_ms : 100 ,
173
+ } ;
174
+
175
+ let attempts = AtomicUsize :: new ( 0 ) ;
176
+
177
+ let result = retry_with_exponential_backoff ( policy, "test_operation" , || {
178
+ let attempt = attempts. fetch_add ( 1 , Ordering :: SeqCst ) ;
179
+ Box :: pin ( async move {
180
+ if attempt < 2 {
181
+ Err :: < & str , & str > ( "error" ) // Fail the first two attempts
182
+ } else {
183
+ Ok :: < & str , & str > ( "success" ) // Succeed on the third attempt
184
+ }
185
+ } )
186
+ } ) . await ;
187
+
188
+ assert_eq ! ( result, Ok ( "success" ) ) ;
189
+ assert_eq ! ( attempts. load( Ordering :: SeqCst ) , 3 ) ; // Ensure there were 3 attempts
190
+ }
191
+
192
+ // Test to ensure retry_with_exponential_backoff fails after max retries
193
+ #[ tokio:: test]
194
+ async fn test_retry_with_exponential_backoff_failure ( ) {
195
+ let policy = RetryPolicy {
196
+ max_retries : 3 ,
197
+ initial_delay_ms : 100 ,
198
+ max_delay_ms : 1600 ,
199
+ jitter_ms : 100 ,
200
+ } ;
201
+
202
+ let attempts = AtomicUsize :: new ( 0 ) ;
203
+
204
+ let result = retry_with_exponential_backoff ( policy, "test_operation" , || {
205
+ attempts. fetch_add ( 1 , Ordering :: SeqCst ) ;
206
+ Box :: pin ( async { Err :: < ( ) , _ > ( "error" ) } ) // Always fail
207
+ } ) . await ;
208
+
209
+ assert_eq ! ( result, Err ( "error" ) ) ;
210
+ assert_eq ! ( attempts. load( Ordering :: SeqCst ) , 4 ) ; // Ensure there were 4 attempts (initial + 3 retries)
211
+ }
212
+
213
+ // Test to ensure retry_with_exponential_backoff respects the timeout
214
+ #[ tokio:: test]
215
+ async fn test_retry_with_exponential_backoff_timeout ( ) {
216
+ let policy = RetryPolicy {
217
+ max_retries : 12 , // Increase the number of retries
218
+ initial_delay_ms : 100 ,
219
+ max_delay_ms : 1600 ,
220
+ jitter_ms : 100 ,
221
+ } ;
222
+
223
+ let result = timeout ( Duration :: from_secs ( 1 ) , retry_with_exponential_backoff ( policy, "test_operation" , || {
224
+ Box :: pin ( async { Err :: < ( ) , _ > ( "error" ) } ) // Always fail
225
+ } ) ) . await ;
226
+
227
+ assert ! ( result. is_err( ) ) ; // Ensure the operation times out
228
+ }
229
+ }
0 commit comments