@@ -14,8 +14,9 @@ use opentelemetry_proto::transform::logs::tonic::group_logs_by_resource_and_scop
1414
1515use super :: BoxInterceptor ;
1616
17- use opentelemetry_sdk:: retry:: { retry_with_exponential_backoff, RetryPolicy } ;
18- use opentelemetry_sdk:: runtime:: Tokio ;
17+ use opentelemetry_sdk:: retry:: { RetryPolicy , RetryErrorType } ;
18+ use opentelemetry_sdk:: runtime:: { Tokio , Runtime } ;
19+ use crate :: retry_classification:: grpc:: classify_tonic_status;
1920
2021pub ( crate ) struct TonicLogsClient {
2122 inner : Mutex < Option < ClientInner > > ,
@@ -71,54 +72,88 @@ impl LogExporter for TonicLogsClient {
7172
7273 let batch = Arc :: new ( batch) ;
7374
74- retry_with_exponential_backoff ( Tokio , policy, "TonicLogsClient.Export" , {
75- let batch = Arc :: clone ( & batch) ;
76- let inner = & self . inner ;
77- let resource = & self . resource ;
78- move || {
79- let batch = Arc :: clone ( & batch) ;
80- Box :: pin ( async move {
81- let ( mut client, metadata, extensions) = match inner. lock ( ) . await . as_mut ( ) {
82- Some ( inner) => {
83- let ( m, e, _) = inner
84- . interceptor
85- . call ( Request :: new ( ( ) ) )
86- . map_err ( |e| {
87- OTelSdkError :: InternalFailure ( format ! ( "error: {e:?}" ) )
88- } ) ?
89- . into_parts ( ) ;
90- ( inner. client . clone ( ) , m, e)
75+ // Custom retry loop that preserves tonic::Status for proper classification
76+ let mut attempt = 0 ;
77+ let mut delay = policy. initial_delay_ms ;
78+
79+ loop {
80+ let batch_clone = Arc :: clone ( & batch) ;
81+
82+ // Execute the export operation
83+ let result = {
84+ let ( mut client, metadata, extensions) = match self . inner . lock ( ) . await . as_mut ( ) {
85+ Some ( inner) => {
86+ let ( m, e, _) = inner
87+ . interceptor
88+ . call ( Request :: new ( ( ) ) )
89+ . map_err ( |e| {
90+ OTelSdkError :: InternalFailure ( format ! ( "error: {e:?}" ) )
91+ } ) ?
92+ . into_parts ( ) ;
93+ ( inner. client . clone ( ) , m, e)
94+ }
95+ None => return Err ( OTelSdkError :: AlreadyShutdown ) ,
96+ } ;
97+
98+ let resource_logs = group_logs_by_resource_and_scope ( & batch_clone, & self . resource ) ;
99+
100+ otel_debug ! ( name: "TonicLogsClient.ExportStarted" ) ;
101+
102+ client
103+ . export ( Request :: from_parts (
104+ metadata,
105+ extensions,
106+ ExportLogsServiceRequest { resource_logs } ,
107+ ) )
108+ . await
109+ } ;
110+
111+ match result {
112+ Ok ( _) => {
113+ otel_debug ! ( name: "TonicLogsClient.ExportSucceeded" ) ;
114+ return Ok ( ( ) ) ;
115+ }
116+ Err ( tonic_status) => {
117+ // ✅ PROPER STRUCTURED ERROR HANDLING
118+ // Classify the tonic::Status directly with structured data
119+ let error_classification = classify_tonic_status ( & tonic_status) ;
120+
121+ match error_classification {
122+ RetryErrorType :: NonRetryable => {
123+ let error = format ! ( "export error: {tonic_status:?}" ) ;
124+ otel_debug ! ( name: "TonicLogsClient.ExportFailed" , error = & error) ;
125+ return Err ( OTelSdkError :: InternalFailure ( error) ) ;
91126 }
92- None => return Err ( OTelSdkError :: AlreadyShutdown ) ,
93- } ;
94-
95- let resource_logs = group_logs_by_resource_and_scope ( & batch, resource) ;
96-
97- otel_debug ! ( name: "TonicLogsClient.ExportStarted" ) ;
98-
99- let result = client
100- . export ( Request :: from_parts (
101- metadata,
102- extensions,
103- ExportLogsServiceRequest { resource_logs } ,
104- ) )
105- . await ;
106-
107- match result {
108- Ok ( _) => {
109- otel_debug ! ( name: "TonicLogsClient.ExportSucceeded" ) ;
110- Ok ( ( ) )
127+ RetryErrorType :: Retryable if attempt < policy. max_retries => {
128+ attempt += 1 ;
129+ otel_debug ! ( name: "TonicLogsClient.ExportRetrying" , attempt = attempt, error = format!( "{tonic_status:?}" ) ) ;
130+
131+ // Exponential backoff with jitter
132+ let jitter = ( std:: time:: SystemTime :: now ( )
133+ . duration_since ( std:: time:: SystemTime :: UNIX_EPOCH )
134+ . unwrap ( )
135+ . subsec_nanos ( ) as u64 ) % ( policy. jitter_ms + 1 ) ;
136+ let delay_with_jitter = std:: cmp:: min ( delay + jitter, policy. max_delay_ms ) ;
137+ Tokio . delay ( std:: time:: Duration :: from_millis ( delay_with_jitter) ) . await ;
138+ delay = std:: cmp:: min ( delay * 2 , policy. max_delay_ms ) ;
111139 }
112- Err ( e) => {
113- let error = format ! ( "export error: {e:?}" ) ;
114- otel_debug ! ( name: "TonicLogsClient.ExportFailed" , error = & error) ;
115- Err ( OTelSdkError :: InternalFailure ( error) )
140+ RetryErrorType :: Throttled ( server_delay) if attempt < policy. max_retries => {
141+ attempt += 1 ;
142+ otel_debug ! ( name: "TonicLogsClient.ExportThrottled" , attempt = attempt, delay_ms = server_delay. as_millis( ) ) ;
143+
144+ // Use server-specified delay
145+ Tokio . delay ( server_delay) . await ;
146+ }
147+ _ => {
148+ // Max retries reached
149+ let error = format ! ( "export error after {attempt} attempts: {tonic_status:?}" ) ;
150+ otel_debug ! ( name: "TonicLogsClient.ExportFailedFinal" , error = & error) ;
151+ return Err ( OTelSdkError :: InternalFailure ( error) ) ;
116152 }
117153 }
118- } )
154+ }
119155 }
120- } )
121- . await
156+ }
122157 }
123158
124159 fn shutdown_with_timeout ( & self , _timeout : time:: Duration ) -> OTelSdkResult {
0 commit comments