Skip to content

Commit c8982f7

Browse files
Plat 330/deliver metrics to otel (#36)
1 parent 2f6d261 commit c8982f7

File tree

8 files changed

+114
-46
lines changed

8 files changed

+114
-46
lines changed

Cargo.lock

Lines changed: 0 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,6 @@ opentelemetry_sdk = { version = "0.31", default-features = false, features = [
4949
"logs",
5050
"metrics",
5151
] }
52-
opentelemetry-stdout = { version = "0.31", default-features = false, features = [
53-
"logs",
54-
"metrics",
55-
] }
5652
prometheus = "0.14"
5753
s3s = "0.13.0-alpha.3"
5854
s3s-aws = "0.13.0-alpha.3"

dev/Dockerfile.s3-cache

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ WORKDIR /app
55
COPY Cargo.toml Cargo.lock ./
66
COPY src/ src/
77

8-
RUN cargo build --release --bin s3_cache
8+
RUN cargo build --bin s3_cache
99

1010
FROM ubuntu:24.04
1111

@@ -14,7 +14,7 @@ RUN apt-get update && apt-get install -y \
1414
curl \
1515
&& rm -rf /var/lib/apt/lists/*
1616

17-
COPY --from=builder /app/target/release/s3_cache /usr/local/bin/s3_cache
17+
COPY --from=builder /app/target/debug/s3_cache /usr/local/bin/s3_cache
1818

1919
RUN mkdir -p /metrics
2020

dev/docker-compose.yml

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,37 @@ services:
3333
CACHE_MAX_SIZE_BYTES: "1073741824"
3434
CACHE_MAX_OBJECT_SIZE_BYTES: "10485760"
3535
CACHE_TTL_SECONDS: "86400"
36-
PROMETHEUS_TEXTFILE_DIR: /metrics
36+
OTEL_GRPC_ENDPOINT_URL: "http://otel-collector:4317"
37+
OTEL_EXPORT_METRICS: "true"
3738
RUST_LOG: info
3839
depends_on:
3940
minio:
4041
condition: service_healthy
42+
otel-collector:
43+
condition: service_started
4144
healthcheck:
4245
test: [ "CMD", "curl", "-f", "http://localhost:8080/health" ]
4346
interval: 5s
4447
timeout: 3s
4548
retries: 10
4649

50+
otel-collector:
51+
image: otel/opentelemetry-collector-contrib:0.147.0
52+
volumes:
53+
- ./otel-collector-config.yml:/etc/otelcol-contrib/config.yaml:ro
54+
ports:
55+
- "127.0.0.1:4317:4317"
56+
- "127.0.0.1:8889:8889"
57+
58+
prometheus:
59+
image: prom/prometheus:v3.10.0
60+
volumes:
61+
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
62+
ports:
63+
- "127.0.0.1:9090:9090"
64+
depends_on:
65+
- otel-collector
66+
4767
test-runner:
4868
image: amazon/aws-cli
4969
entrypoint: [ "/bin/bash", "/scripts/test.sh" ]

src/config.rs

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,14 @@ pub struct Config {
8080
#[arg(long, env = "OTEL_GRPC_ENDPOINT_URL")]
8181
pub otel_grpc_endpoint_url: Option<String>,
8282

83+
/// Export metrics via OTLP gRPC (requires otel_grpc_endpoint_url)
84+
#[arg(long, env = "OTEL_EXPORT_METRICS", default_value_t = false, action = clap::ArgAction::Set)]
85+
pub otel_export_metrics: bool,
86+
87+
/// Export logs via OTLP gRPC (requires otel_grpc_endpoint_url)
88+
#[arg(long, env = "OTEL_EXPORT_LOGS", default_value_t = false, action = clap::ArgAction::Set)]
89+
pub otel_export_logs: bool,
90+
8391
/// Prometheus textfile collector directory
8492
#[arg(long, env = "PROMETHEUS_TEXTFILE_DIR")]
8593
pub prometheus_textfile_dir: Option<String>,
@@ -114,6 +122,14 @@ impl Config {
114122
if self.worker_threads == 0 {
115123
panic!("Invalid configuration: worker_threads must be greater than 0");
116124
}
125+
126+
if (self.otel_export_metrics || self.otel_export_logs)
127+
&& self.otel_grpc_endpoint_url.is_none()
128+
{
129+
panic!(
130+
"Invalid configuration: otel_export_metrics and otel_export_logs require otel_grpc_endpoint_url to be set"
131+
);
132+
}
117133
}
118134
}
119135

@@ -123,7 +139,8 @@ impl Display for Config {
123139
f,
124140
"Config{{ listen_addr: {}, upstream_endpoint: {}, upstream_region: {}, \
125141
cache_max_entries: {}, cache_max_size_bytes: {}, cache_ttl_seconds: {}, \
126-
max_cacheable_object_size: {}, otel_grpc_endpoint_url: {:?}, cache_shards: {}, \
142+
max_cacheable_object_size: {}, otel_grpc_endpoint_url: {:?}, \
143+
otel_export_metrics: {}, otel_export_logs: {}, cache_shards: {}, \
127144
cache_dry_run: {}, worker_threads: {}, prometheus_textfile_dir: {:?} }}",
128145
self.listen_addr,
129146
self.upstream_endpoint,
@@ -133,6 +150,8 @@ impl Display for Config {
133150
self.cache_ttl_seconds,
134151
self.cache_max_object_size_bytes,
135152
self.otel_grpc_endpoint_url,
153+
self.otel_export_metrics,
154+
self.otel_export_logs,
136155
self.cache_shards,
137156
self.cache_dry_run,
138157
self.worker_threads,
@@ -165,6 +184,8 @@ mod tests {
165184
cache_ttl_seconds: 86_400,
166185
worker_threads: 4,
167186
otel_grpc_endpoint_url: None,
187+
otel_export_metrics: false,
188+
otel_export_logs: false,
168189
prometheus_textfile_dir: None,
169190
}
170191
}
@@ -210,4 +231,33 @@ mod tests {
210231
config.worker_threads = 0;
211232
config.validate();
212233
}
234+
235+
#[test]
236+
#[should_panic(
237+
expected = "otel_export_metrics and otel_export_logs require otel_grpc_endpoint_url"
238+
)]
239+
fn config_otel_export_metrics_without_endpoint() {
240+
let mut config = minimal_config();
241+
config.otel_export_metrics = true;
242+
config.validate();
243+
}
244+
245+
#[test]
246+
#[should_panic(
247+
expected = "otel_export_metrics and otel_export_logs require otel_grpc_endpoint_url"
248+
)]
249+
fn config_otel_export_logs_without_endpoint() {
250+
let mut config = minimal_config();
251+
config.otel_export_logs = true;
252+
config.validate();
253+
}
254+
255+
#[test]
256+
fn config_otel_export_with_endpoint() {
257+
let mut config = minimal_config();
258+
config.otel_grpc_endpoint_url = Some("http://localhost:4317".to_string());
259+
config.otel_export_metrics = true;
260+
config.otel_export_logs = true;
261+
config.validate();
262+
}
213263
}

src/telemetry.rs

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,16 @@ const EVICTION_AGE_BUCKETS: &[f64] = &[
8484
pub(crate) fn initialize_telemetry(
8585
config: &Config,
8686
) -> crate::Result<(
87-
opentelemetry_sdk::metrics::SdkMeterProvider,
87+
Option<opentelemetry_sdk::metrics::SdkMeterProvider>,
8888
Option<opentelemetry_sdk::logs::SdkLoggerProvider>,
8989
)> {
9090
let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
9191

92-
let logs_provider = init_logs(config.otel_grpc_endpoint_url.as_deref())?;
92+
let otel_logs_endpoint = config
93+
.otel_grpc_endpoint_url
94+
.as_deref()
95+
.filter(|_| config.otel_export_logs);
96+
let logs_provider = init_logs(otel_logs_endpoint)?;
9397

9498
match logs_provider.as_ref() {
9599
None => {
@@ -109,7 +113,11 @@ pub(crate) fn initialize_telemetry(
109113
}
110114
}
111115

112-
let metrics_provider = init_metrics(config.otel_grpc_endpoint_url.as_deref())?;
116+
let otel_metrics_endpoint = config
117+
.otel_grpc_endpoint_url
118+
.as_deref()
119+
.filter(|_| config.otel_export_metrics);
120+
let metrics_provider = init_metrics(otel_metrics_endpoint)?;
113121

114122
Ok((metrics_provider, logs_provider))
115123
}
@@ -147,35 +155,37 @@ pub(crate) fn shutdown_logs(logs_provider: Option<opentelemetry_sdk::logs::SdkLo
147155

148156
fn init_metrics(
149157
otel_grpc_endpoint_url: Option<&str>,
150-
) -> crate::Result<opentelemetry_sdk::metrics::SdkMeterProvider> {
151-
let builder =
152-
opentelemetry_sdk::metrics::SdkMeterProvider::builder().with_resource(RESOURCE.clone());
158+
) -> crate::Result<Option<opentelemetry_sdk::metrics::SdkMeterProvider>> {
159+
let Some(otel_grpc_endpoint_url) = otel_grpc_endpoint_url else {
160+
info!("OTLP metrics export disabled");
161+
return Ok(None);
162+
};
153163

154-
let provider = match otel_grpc_endpoint_url {
155-
None => {
156-
info!("opentelemetry_stdout initialized");
157-
builder.with_periodic_exporter(opentelemetry_stdout::MetricExporter::default())
158-
}
159-
Some(otel_grpc_endpoint_url) => {
160-
info!("opentelemetry_otlp initialized");
161-
let otlp_exporter = opentelemetry_otlp::MetricExporter::builder()
162-
.with_tonic()
163-
.with_compression(Compression::Gzip)
164-
.with_endpoint(otel_grpc_endpoint_url)
165-
.with_timeout(Duration::from_secs(5))
166-
.build()?;
167-
168-
builder.with_periodic_exporter(otlp_exporter)
169-
}
170-
}
171-
.build();
164+
info!("OTLP metrics export enabled (endpoint: {otel_grpc_endpoint_url})");
165+
let otlp_exporter = opentelemetry_otlp::MetricExporter::builder()
166+
.with_tonic()
167+
.with_compression(Compression::Gzip)
168+
.with_endpoint(otel_grpc_endpoint_url)
169+
.with_timeout(Duration::from_secs(5))
170+
.build()?;
171+
172+
let provider = opentelemetry_sdk::metrics::SdkMeterProvider::builder()
173+
.with_resource(RESOURCE.clone())
174+
.with_periodic_exporter(otlp_exporter)
175+
.build();
172176

173177
opentelemetry::global::set_meter_provider(provider.clone());
174178

175-
Ok(provider)
179+
Ok(Some(provider))
176180
}
177181

178-
pub(crate) fn shutdown_metrics(metric_provider: opentelemetry_sdk::metrics::SdkMeterProvider) {
182+
pub(crate) fn shutdown_metrics(
183+
metric_provider: Option<opentelemetry_sdk::metrics::SdkMeterProvider>,
184+
) {
185+
let Some(metric_provider) = metric_provider else {
186+
return;
187+
};
188+
179189
if let Err(error) = metric_provider.shutdown() {
180190
error!("Error during metric shutdown: {error:?}");
181191
}

tests/common/docker.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ pub async fn start_proxy(minio_api_port: u16) -> TestProxy {
9090
cache_ttl_seconds: 86_400,
9191
worker_threads: 4,
9292
otel_grpc_endpoint_url: None, // no network side effects in tests
93+
otel_export_logs: false,
94+
otel_export_metrics: false,
9395
prometheus_textfile_dir: None, // no filesystem side effects in tests
9496
};
9597

tests/integration_health.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ async fn start_test_server_with_upstream(
2828
cache_ttl_seconds: 60,
2929
worker_threads: 2,
3030
otel_grpc_endpoint_url: None,
31+
otel_export_metrics: false,
32+
otel_export_logs: false,
3133
prometheus_textfile_dir: None,
3234
};
3335

0 commit comments

Comments
 (0)