Skip to content

Commit df412fe

Browse files
chore: OTLP retry post-review cleanup (#3204)
Co-authored-by: Cijo Thomas <[email protected]>
1 parent 796ac1a commit df412fe

File tree

2 files changed

+32
-30
lines changed

2 files changed

+32
-30
lines changed

opentelemetry-otlp/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ Released 2025-Sep-25
1212
- Update `opentelemetry-proto` and `opentelemetry-http` dependency version to 0.31.0
1313
- Add HTTP compression support with `gzip-http` and `zstd-http` feature flags
1414
- Add retry with exponential backoff and throttling support for HTTP and gRPC exporters
15+
This behaviour is opt in via the `experimental-grpc-retry` and `experimental-http-retry` flags on this crate. You can customize the retry policy using the `with_retry_policy` on the exporter builders.
1516

1617
## 0.30.0
1718

opentelemetry-otlp/src/retry.rs

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
//! specified retry policy, using exponential backoff and jitter to determine the delay between
88
//! retries. The function uses error classification to determine retry behavior and can honor
99
//! server-provided throttling hints.
10+
#[cfg(any(
11+
feature = "experimental-grpc-retry",
12+
feature = "experimental-http-retry"
13+
))]
14+
use opentelemetry::{otel_debug, otel_info};
1015

1116
#[cfg(any(
1217
feature = "experimental-grpc-retry",
@@ -17,24 +22,23 @@ use opentelemetry::otel_warn;
1722
feature = "experimental-grpc-retry",
1823
feature = "experimental-http-retry"
1924
))]
20-
use std::future::Future;
25+
use opentelemetry_sdk::runtime::Runtime;
2126
#[cfg(any(
2227
feature = "experimental-grpc-retry",
2328
feature = "experimental-http-retry"
2429
))]
25-
use std::hash::{DefaultHasher, Hasher};
26-
use std::time::Duration;
30+
use std::future::Future;
2731
#[cfg(any(
2832
feature = "experimental-grpc-retry",
2933
feature = "experimental-http-retry"
3034
))]
31-
use std::time::SystemTime;
32-
35+
use std::hash::{DefaultHasher, Hasher};
36+
use std::time::Duration;
3337
#[cfg(any(
3438
feature = "experimental-grpc-retry",
3539
feature = "experimental-http-retry"
3640
))]
37-
use opentelemetry_sdk::runtime::Runtime;
41+
use std::time::SystemTime;
3842

3943
/// Classification of errors for retry purposes.
4044
#[derive(Debug, Clone, PartialEq)]
@@ -61,26 +65,6 @@ pub struct RetryPolicy {
6165
pub jitter_ms: u64,
6266
}
6367

64-
/// A runtime stub for when experimental_async_runtime is not enabled.
65-
/// This allows retry policy to be configured but no actual retries occur.
66-
#[cfg(not(any(
67-
feature = "experimental-grpc-retry",
68-
feature = "experimental-http-retry"
69-
)))]
70-
#[derive(Debug, Clone, Default)]
71-
pub struct NoOpRuntime;
72-
73-
#[cfg(not(any(
74-
feature = "experimental-grpc-retry",
75-
feature = "experimental-http-retry"
76-
)))]
77-
impl NoOpRuntime {
78-
/// Creates a new no-op runtime.
79-
pub fn new() -> Self {
80-
Self
81-
}
82-
}
83-
8468
// Generates a random jitter value up to max_jitter
8569
#[cfg(any(
8670
feature = "experimental-grpc-retry",
@@ -144,15 +128,23 @@ where
144128

145129
match error_type {
146130
RetryErrorType::NonRetryable => {
147-
otel_warn!(name: "OtlpRetry", message = format!("Operation {:?} failed with non-retryable error: {:?}", operation_name, err));
131+
otel_warn!(name: "Export.Failed.NonRetryable",
132+
operation = operation_name,
133+
message = "OTLP export failed with non-retryable error - telemetry data will be lost");
148134
return Err(err);
149135
}
150136
RetryErrorType::Retryable if attempt < policy.max_retries => {
151137
attempt += 1;
152138
// Use exponential backoff with jitter
153-
otel_warn!(name: "OtlpRetry", message = format!("Retrying operation {:?} due to retryable error: {:?}", operation_name, err));
154139
let jitter = generate_jitter(policy.jitter_ms);
155140
let delay_with_jitter = std::cmp::min(delay + jitter, policy.max_delay_ms);
141+
otel_debug!(name: "Export.InProgress.Retrying",
142+
operation = operation_name,
143+
attempt = attempt,
144+
delay_ms = delay_with_jitter,
145+
jitter_ms = jitter,
146+
message = "OTLP export failed with retryable error - retrying"
147+
);
156148
runtime
157149
.delay(Duration::from_millis(delay_with_jitter))
158150
.await;
@@ -161,13 +153,22 @@ where
161153
RetryErrorType::Throttled(server_delay) if attempt < policy.max_retries => {
162154
attempt += 1;
163155
// Use server-specified delay (overrides exponential backoff)
164-
otel_warn!(name: "OtlpRetry", message = format!("Retrying operation {:?} after server-specified throttling delay: {:?}", operation_name, server_delay));
156+
otel_info!(name: "Export.InProgress.Throttled",
157+
operation = operation_name,
158+
attempt = attempt,
159+
delay_ms = server_delay.as_millis(),
160+
message = "OTLP export throttled by OTLP endpoint - delaying and retrying"
161+
);
165162
runtime.delay(server_delay).await;
166163
// Don't update exponential backoff delay for next attempt since server provided specific timing
167164
}
168165
_ => {
169166
// Max retries reached
170-
otel_warn!(name: "OtlpRetry", message = format!("Operation {:?} failed after {} attempts: {:?}", operation_name, attempt, err));
167+
otel_warn!(name: "Export.Failed.Exhausted",
168+
operation = operation_name,
169+
retries = attempt,
170+
message = "OTLP export exhausted retries - telemetry data will be lost"
171+
);
171172
return Err(err);
172173
}
173174
}

0 commit comments

Comments
 (0)