@@ -14,8 +14,9 @@ use opentelemetry_proto::transform::logs::tonic::group_logs_by_resource_and_scop
1414
1515use super :: BoxInterceptor ;
1616
17- use opentelemetry_sdk:: retry:: { retry_with_exponential_backoff, RetryPolicy } ;
18- use opentelemetry_sdk:: runtime:: Tokio ;
17+ use crate :: retry_classification:: grpc:: classify_tonic_status;
18+ use opentelemetry_sdk:: retry:: { RetryErrorType , RetryPolicy } ;
19+ use opentelemetry_sdk:: runtime:: { Runtime , Tokio } ;
1920
2021pub ( crate ) struct TonicLogsClient {
2122 inner : Mutex < Option < ClientInner > > ,
@@ -71,54 +72,91 @@ impl LogExporter for TonicLogsClient {
7172
7273 let batch = Arc :: new ( batch) ;
7374
74- retry_with_exponential_backoff ( Tokio , policy, "TonicLogsClient.Export" , {
75- let batch = Arc :: clone ( & batch) ;
76- let inner = & self . inner ;
77- let resource = & self . resource ;
78- move || {
79- let batch = Arc :: clone ( & batch) ;
80- Box :: pin ( async move {
81- let ( mut client, metadata, extensions) = match inner. lock ( ) . await . as_mut ( ) {
82- Some ( inner) => {
83- let ( m, e, _) = inner
84- . interceptor
85- . call ( Request :: new ( ( ) ) )
86- . map_err ( |e| {
87- OTelSdkError :: InternalFailure ( format ! ( "error: {e:?}" ) )
88- } ) ?
89- . into_parts ( ) ;
90- ( inner. client . clone ( ) , m, e)
75+ // Custom retry loop that preserves tonic::Status for proper classification
76+ let mut attempt = 0 ;
77+ let mut delay = policy. initial_delay_ms ;
78+
79+ loop {
80+ let batch_clone = Arc :: clone ( & batch) ;
81+
82+ // Execute the export operation
83+ let result = {
84+ let ( mut client, metadata, extensions) = match self . inner . lock ( ) . await . as_mut ( ) {
85+ Some ( inner) => {
86+ let ( m, e, _) = inner
87+ . interceptor
88+ . call ( Request :: new ( ( ) ) )
89+ . map_err ( |e| OTelSdkError :: InternalFailure ( format ! ( "error: {e:?}" ) ) ) ?
90+ . into_parts ( ) ;
91+ ( inner. client . clone ( ) , m, e)
92+ }
93+ None => return Err ( OTelSdkError :: AlreadyShutdown ) ,
94+ } ;
95+
96+ let resource_logs = group_logs_by_resource_and_scope ( & batch_clone, & self . resource ) ;
97+
98+ otel_debug ! ( name: "TonicLogsClient.ExportStarted" ) ;
99+
100+ client
101+ . export ( Request :: from_parts (
102+ metadata,
103+ extensions,
104+ ExportLogsServiceRequest { resource_logs } ,
105+ ) )
106+ . await
107+ } ;
108+
109+ match result {
110+ Ok ( _) => {
111+ otel_debug ! ( name: "TonicLogsClient.ExportSucceeded" ) ;
112+ return Ok ( ( ) ) ;
113+ }
114+ Err ( tonic_status) => {
115+ // ✅ PROPER STRUCTURED ERROR HANDLING
116+ // Classify the tonic::Status directly with structured data
117+ let error_classification = classify_tonic_status ( & tonic_status) ;
118+
119+ match error_classification {
120+ RetryErrorType :: NonRetryable => {
121+ let error = format ! ( "export error: {tonic_status:?}" ) ;
122+ otel_debug ! ( name: "TonicLogsClient.ExportFailed" , error = & error) ;
123+ return Err ( OTelSdkError :: InternalFailure ( error) ) ;
91124 }
92- None => return Err ( OTelSdkError :: AlreadyShutdown ) ,
93- } ;
94-
95- let resource_logs = group_logs_by_resource_and_scope ( & batch, resource) ;
96-
97- otel_debug ! ( name: "TonicLogsClient.ExportStarted" ) ;
98-
99- let result = client
100- . export ( Request :: from_parts (
101- metadata,
102- extensions,
103- ExportLogsServiceRequest { resource_logs } ,
104- ) )
105- . await ;
125+ RetryErrorType :: Retryable if attempt < policy. max_retries => {
126+ attempt += 1 ;
127+ otel_debug ! ( name: "TonicLogsClient.ExportRetrying" , attempt = attempt, error = format!( "{tonic_status:?}" ) ) ;
128+
129+ // Exponential backoff with jitter
130+ let jitter = ( std:: time:: SystemTime :: now ( )
131+ . duration_since ( std:: time:: SystemTime :: UNIX_EPOCH )
132+ . unwrap ( )
133+ . subsec_nanos ( ) as u64 )
134+ % ( policy. jitter_ms + 1 ) ;
135+ let delay_with_jitter =
136+ std:: cmp:: min ( delay + jitter, policy. max_delay_ms ) ;
137+ Tokio
138+ . delay ( std:: time:: Duration :: from_millis ( delay_with_jitter) )
139+ . await ;
140+ delay = std:: cmp:: min ( delay * 2 , policy. max_delay_ms ) ;
141+ }
142+ RetryErrorType :: Throttled ( server_delay) if attempt < policy. max_retries => {
143+ attempt += 1 ;
144+ otel_debug ! ( name: "TonicLogsClient.ExportThrottled" , attempt = attempt, delay_ms = server_delay. as_millis( ) ) ;
106145
107- match result {
108- Ok ( _) => {
109- otel_debug ! ( name: "TonicLogsClient.ExportSucceeded" ) ;
110- Ok ( ( ) )
146+ // Use server-specified delay
147+ Tokio . delay ( server_delay) . await ;
111148 }
112- Err ( e) => {
113- let error = format ! ( "export error: {e:?}" ) ;
114- otel_debug ! ( name: "TonicLogsClient.ExportFailed" , error = & error) ;
115- Err ( OTelSdkError :: InternalFailure ( error) )
149+ _ => {
150+ // Max retries reached
151+ let error =
152+ format ! ( "export error after {attempt} attempts: {tonic_status:?}" ) ;
153+ otel_debug ! ( name: "TonicLogsClient.ExportFailedFinal" , error = & error) ;
154+ return Err ( OTelSdkError :: InternalFailure ( error) ) ;
116155 }
117156 }
118- } )
157+ }
119158 }
120- } )
121- . await
159+ }
122160 }
123161
124162 fn shutdown_with_timeout ( & self , _timeout : time:: Duration ) -> OTelSdkResult {
0 commit comments