77//! specified retry policy, using exponential backoff and jitter to determine the delay between
88//! retries. The function uses error classification to determine retry behavior and can honor
99//! server-provided throttling hints.
10+ #[ cfg( any(
11+ feature = "experimental-grpc-retry" ,
12+ feature = "experimental-http-retry"
13+ ) ) ]
14+ use opentelemetry:: { otel_debug, otel_info} ;
1015
1116#[ cfg( any(
1217 feature = "experimental-grpc-retry" ,
@@ -17,24 +22,23 @@ use opentelemetry::otel_warn;
1722 feature = "experimental-grpc-retry" ,
1823 feature = "experimental-http-retry"
1924) ) ]
20- use std :: future :: Future ;
25+ use opentelemetry_sdk :: runtime :: Runtime ;
2126#[ cfg( any(
2227 feature = "experimental-grpc-retry" ,
2328 feature = "experimental-http-retry"
2429) ) ]
25- use std:: hash:: { DefaultHasher , Hasher } ;
26- use std:: time:: Duration ;
30+ use std:: future:: Future ;
2731#[ cfg( any(
2832 feature = "experimental-grpc-retry" ,
2933 feature = "experimental-http-retry"
3034) ) ]
31- use std:: time :: SystemTime ;
32-
35+ use std:: hash :: { DefaultHasher , Hasher } ;
36+ use std :: time :: Duration ;
3337#[ cfg( any(
3438 feature = "experimental-grpc-retry" ,
3539 feature = "experimental-http-retry"
3640) ) ]
37- use opentelemetry_sdk :: runtime :: Runtime ;
41+ use std :: time :: SystemTime ;
3842
3943/// Classification of errors for retry purposes.
4044#[ derive( Debug , Clone , PartialEq ) ]
@@ -61,26 +65,6 @@ pub struct RetryPolicy {
6165 pub jitter_ms : u64 ,
6266}
6367
64- /// A runtime stub for when experimental_async_runtime is not enabled.
65- /// This allows retry policy to be configured but no actual retries occur.
66- #[ cfg( not( any(
67- feature = "experimental-grpc-retry" ,
68- feature = "experimental-http-retry"
69- ) ) ) ]
70- #[ derive( Debug , Clone , Default ) ]
71- pub struct NoOpRuntime ;
72-
73- #[ cfg( not( any(
74- feature = "experimental-grpc-retry" ,
75- feature = "experimental-http-retry"
76- ) ) ) ]
77- impl NoOpRuntime {
78- /// Creates a new no-op runtime.
79- pub fn new ( ) -> Self {
80- Self
81- }
82- }
83-
8468// Generates a random jitter value up to max_jitter
8569#[ cfg( any(
8670 feature = "experimental-grpc-retry" ,
@@ -144,15 +128,23 @@ where
144128
145129 match error_type {
146130 RetryErrorType :: NonRetryable => {
147- otel_warn ! ( name: "OtlpRetry" , message = format!( "Operation {:?} failed with non-retryable error: {:?}" , operation_name, err) ) ;
131+ otel_warn ! ( name: "Export.Failed.NonRetryable" ,
132+ operation = operation_name,
133+ message = "OTLP export failed with non-retryable error - telemetry data will be lost" ) ;
148134 return Err ( err) ;
149135 }
150136 RetryErrorType :: Retryable if attempt < policy. max_retries => {
151137 attempt += 1 ;
152138 // Use exponential backoff with jitter
153- otel_warn ! ( name: "OtlpRetry" , message = format!( "Retrying operation {:?} due to retryable error: {:?}" , operation_name, err) ) ;
154139 let jitter = generate_jitter ( policy. jitter_ms ) ;
155140 let delay_with_jitter = std:: cmp:: min ( delay + jitter, policy. max_delay_ms ) ;
141+ otel_debug ! ( name: "Export.InProgress.Retrying" ,
142+ operation = operation_name,
143+ attempt = attempt,
144+ delay_ms = delay_with_jitter,
145+ jitter_ms = jitter,
146+ message = "OTLP export failed with retryable error - retrying"
147+ ) ;
156148 runtime
157149 . delay ( Duration :: from_millis ( delay_with_jitter) )
158150 . await ;
@@ -161,13 +153,22 @@ where
161153 RetryErrorType :: Throttled ( server_delay) if attempt < policy. max_retries => {
162154 attempt += 1 ;
163155 // Use server-specified delay (overrides exponential backoff)
164- otel_warn ! ( name: "OtlpRetry" , message = format!( "Retrying operation {:?} after server-specified throttling delay: {:?}" , operation_name, server_delay) ) ;
156+ otel_info ! ( name: "Export.InProgress.Throttled" ,
157+ operation = operation_name,
158+ attempt = attempt,
159+ delay_ms = server_delay. as_millis( ) ,
160+ message = "OTLP export throttled by OTLP endpoint - delaying and retrying"
161+ ) ;
165162 runtime. delay ( server_delay) . await ;
166163 // Don't update exponential backoff delay for next attempt since server provided specific timing
167164 }
168165 _ => {
169166 // Max retries reached
170- otel_warn ! ( name: "OtlpRetry" , message = format!( "Operation {:?} failed after {} attempts: {:?}" , operation_name, attempt, err) ) ;
167+ otel_warn ! ( name: "Export.Failed.Exhausted" ,
168+ operation = operation_name,
169+ retries = attempt,
170+ message = "OTLP export exhausted retries - telemetry data will be lost"
171+ ) ;
171172 return Err ( err) ;
172173 }
173174 }
0 commit comments