Skip to content

Commit c376e07

Browse files
committed
add retry to traces
1 parent 9714d15 commit c376e07

File tree

3 files changed

+57
-33
lines changed

3 files changed

+57
-33
lines changed

opentelemetry-otlp/src/exporter/tonic/logs.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ impl LogExporter for TonicLogsClient {
9292
None => return Err(OTelSdkError::AlreadyShutdown),
9393
};
9494

95-
let resource_logs = group_logs_by_resource_and_scope(&*batch, resource);
95+
let resource_logs = group_logs_by_resource_and_scope(&batch, resource);
9696

9797
otel_debug!(name: "TonicLogsClient.ExportStarted");
9898

opentelemetry-otlp/src/exporter/tonic/trace.rs

Lines changed: 55 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use core::fmt;
2+
use std::sync::Arc;
23
use tokio::sync::Mutex;
34

45
use opentelemetry::otel_debug;
@@ -15,6 +16,9 @@ use tonic::{codegen::CompressionEncoding, service::Interceptor, transport::Chann
1516

1617
use super::BoxInterceptor;
1718

19+
use opentelemetry_sdk::retry::{retry_with_exponential_backoff, RetryPolicy};
20+
use opentelemetry_sdk::runtime::Tokio;
21+
1822
pub(crate) struct TonicTracesClient {
1923
inner: Option<ClientInner>,
2024
#[allow(dead_code)]
@@ -60,43 +64,63 @@ impl TonicTracesClient {
6064

6165
impl SpanExporter for TonicTracesClient {
6266
async fn export(&self, batch: Vec<SpanData>) -> OTelSdkResult {
63-
let (mut client, metadata, extensions) = match &self.inner {
64-
Some(inner) => {
65-
let (m, e, _) = inner
66-
.interceptor
67-
.lock()
68-
.await // tokio::sync::Mutex doesn't return a poisoned error, so we can safely use the interceptor here
69-
.call(Request::new(()))
70-
.map_err(|e| OTelSdkError::InternalFailure(format!("error: {e:?}")))?
71-
.into_parts();
72-
(inner.client.clone(), m, e)
73-
}
74-
None => return Err(OTelSdkError::AlreadyShutdown),
67+
let policy = RetryPolicy {
68+
max_retries: 3,
69+
initial_delay_ms: 100,
70+
max_delay_ms: 1600,
71+
jitter_ms: 100,
7572
};
7673

77-
let resource_spans = group_spans_by_resource_and_scope(batch, &self.resource);
74+
let batch = Arc::new(batch);
7875

79-
otel_debug!(name: "TonicTracesClient.ExportStarted");
76+
retry_with_exponential_backoff(Tokio, policy, "TonicTracesClient.Export", {
77+
let batch = Arc::clone(&batch);
78+
let inner = &self.inner;
79+
let resource = &self.resource;
80+
move || {
81+
let batch = Arc::clone(&batch);
82+
Box::pin(async move {
83+
let (mut client, metadata, extensions) = match inner {
84+
Some(inner) => {
85+
let (m, e, _) = inner
86+
.interceptor
87+
.lock()
88+
.await // tokio::sync::Mutex doesn't return a poisoned error, so we can safely use the interceptor here
89+
.call(Request::new(()))
90+
.map_err(|e| OTelSdkError::InternalFailure(format!("error: {e:?}")))?
91+
.into_parts();
92+
(inner.client.clone(), m, e)
93+
}
94+
None => return Err(OTelSdkError::AlreadyShutdown),
95+
};
8096

81-
let result = client
82-
.export(Request::from_parts(
83-
metadata,
84-
extensions,
85-
ExportTraceServiceRequest { resource_spans },
86-
))
87-
.await;
97+
let resource_spans = group_spans_by_resource_and_scope((*batch).clone(), resource);
8898

89-
match result {
90-
Ok(_) => {
91-
otel_debug!(name: "TonicTracesClient.ExportSucceeded");
92-
Ok(())
93-
}
94-
Err(e) => {
95-
let error = e.to_string();
96-
otel_debug!(name: "TonicTracesClient.ExportFailed", error = &error);
97-
Err(OTelSdkError::InternalFailure(error))
99+
otel_debug!(name: "TonicTracesClient.ExportStarted");
100+
101+
let result = client
102+
.export(Request::from_parts(
103+
metadata,
104+
extensions,
105+
ExportTraceServiceRequest { resource_spans },
106+
))
107+
.await;
108+
109+
match result {
110+
Ok(_) => {
111+
otel_debug!(name: "TonicTracesClient.ExportSucceeded");
112+
Ok(())
113+
}
114+
Err(e) => {
115+
let error = format!("export error: {e:?}");
116+
otel_debug!(name: "TonicTracesClient.ExportFailed", error = &error);
117+
Err(OTelSdkError::InternalFailure(error))
118+
}
119+
}
120+
})
98121
}
99-
}
122+
})
123+
.await
100124
}
101125

102126
fn shutdown(&mut self) -> OTelSdkResult {

opentelemetry-sdk/src/retry.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ pub struct RetryPolicy {
3434
/// A runtime stub for when experimental_async_runtime is not enabled.
3535
/// This allows retry policy to be configured but no actual retries occur.
3636
#[cfg(not(feature = "experimental_async_runtime"))]
37-
#[derive(Debug, Clone)]
37+
#[derive(Debug, Clone, Default)]
3838
pub struct NoOpRuntime;
3939

4040
#[cfg(not(feature = "experimental_async_runtime"))]

0 commit comments

Comments
 (0)