Skip to content

Commit c0d2f62

Browse files
authored
Merge pull request #777 from yeazelm/update_ot
Update Open Telemetry dependencies
2 parents acbf3ab + 64dd229 commit c0d2f62

File tree

12 files changed

+535
-356
lines changed

12 files changed

+535
-356
lines changed

Cargo.lock

Lines changed: 386 additions & 266 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,13 @@ nonzero_ext = "0.3"
4141
# these package versions should be moved in lockstep to match upstream
4242
actix-web = { version = "4.9", features = ["rustls-0_23"] }
4343
tracing-actix-web = "0.7"
44-
actix-web-opentelemetry = { version = "0.18", features = ["metrics", "metrics-prometheus"] }
44+
opentelemetry-instrumentation-actix-web = { version = "0.22", features = ["metrics"] }
4545

4646
# opentelemetry dependencies
47-
opentelemetry = { version = "0.23"}
48-
opentelemetry_sdk = { version = "0.23", features = ["rt-tokio-current-thread"]}
49-
opentelemetry-prometheus = "0.16"
50-
prometheus = "0.13.0"
47+
opentelemetry = { version = "0.29", features = ["metrics"]}
48+
opentelemetry_sdk = { version = "0.29", features = ["rt-tokio-current-thread", "metrics"]}
49+
opentelemetry-prometheus = "0.29"
50+
prometheus = "0.14"
5151

5252
# k8s-openapi must match the version required by kube and enable a k8s version feature
5353
k8s-openapi = { version = "0.21", default-features = false, features = ["v1_24"] }

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ DESTDIR ?= .
2929
# tarball.
3030
DISTFILE ?= $(DESTDIR:/=)/$(subst /,_,$(IMAGE_NAME)).tar.gz
3131

32-
BOTTLEROCKET_SDK_VERSION = v0.61.0
32+
BOTTLEROCKET_SDK_VERSION = v0.62.0
3333

3434
# Tools used during the chart release lifecycle
3535
export KUBECONFORM_VERSION = v0.6.3

apiserver/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ models = { path = "../models", version = "0.1.0" }
1818

1919
actix-web = { workspace = true }
2020
awc = { workspace = true }
21-
actix-web-opentelemetry = { workspace = true }
21+
opentelemetry-instrumentation-actix-web = { workspace = true }
2222
rustls = { workspace = true }
2323
rustls-pemfile = { workspace = true }
2424
webpki = { workspace = true }

apiserver/src/api/mod.rs

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,28 +5,14 @@ pub mod error;
55
mod node;
66
mod ping;
77

8-
use crate::{
9-
auth::{K8STokenAuthorizor, K8STokenReviewer, TokenAuthMiddleware},
10-
constants::{
11-
CRD_CONVERT_ENDPOINT, EXCLUDE_NODE_FROM_LB_ENDPOINT, HEADER_BRUPOP_K8S_AUTH_TOKEN,
12-
HEADER_BRUPOP_NODE_NAME, HEADER_BRUPOP_NODE_UID, NODE_CORDON_AND_DRAIN_ENDPOINT,
13-
NODE_RESOURCE_ENDPOINT, NODE_UNCORDON_ENDPOINT, REMOVE_NODE_EXCLUSION_TO_LB_ENDPOINT,
14-
},
15-
telemetry,
16-
};
17-
use models::constants::{
18-
AGENT, APISERVER_HEALTH_CHECK_ROUTE, APISERVER_SERVICE_NAME, LABEL_COMPONENT, PRIVATE_KEY_NAME,
19-
PUBLIC_KEY_NAME, TLS_KEY_MOUNT_PATH,
20-
};
21-
use models::node::{read_certificate, BottlerocketShadowClient, BottlerocketShadowSelector};
8+
use std::{env, fs::File, io::BufReader};
229

2310
use actix_web::{
2411
dev::ServerHandle,
2512
http::header::HeaderMap,
2613
web::{self, Data},
2714
App, HttpServer,
2815
};
29-
use actix_web_opentelemetry::{PrometheusMetricsHandler, RequestMetrics, RequestTracing};
3016
use futures::StreamExt;
3117
use k8s_openapi::api::core::v1::Pod;
3218
use kube::{
@@ -38,11 +24,25 @@ use kube::{
3824
},
3925
ResourceExt,
4026
};
41-
27+
use opentelemetry_instrumentation_actix_web::{RequestMetrics, RequestTracing};
4228
use rustls::ServerConfig;
4329
use rustls_pemfile::{certs, pkcs8_private_keys};
4430
use snafu::{OptionExt, ResultExt};
45-
use std::{env, fs::File, io::BufReader};
31+
32+
use crate::{
33+
auth::{K8STokenAuthorizor, K8STokenReviewer, TokenAuthMiddleware},
34+
constants::{
35+
CRD_CONVERT_ENDPOINT, EXCLUDE_NODE_FROM_LB_ENDPOINT, HEADER_BRUPOP_K8S_AUTH_TOKEN,
36+
HEADER_BRUPOP_NODE_NAME, HEADER_BRUPOP_NODE_UID, NODE_CORDON_AND_DRAIN_ENDPOINT,
37+
NODE_RESOURCE_ENDPOINT, NODE_UNCORDON_ENDPOINT, REMOVE_NODE_EXCLUSION_TO_LB_ENDPOINT,
38+
},
39+
telemetry,
40+
};
41+
use models::constants::{
42+
AGENT, APISERVER_HEALTH_CHECK_ROUTE, APISERVER_SERVICE_NAME, LABEL_COMPONENT, PRIVATE_KEY_NAME,
43+
PUBLIC_KEY_NAME, TLS_KEY_MOUNT_PATH,
44+
};
45+
use models::node::{read_certificate, BottlerocketShadowClient, BottlerocketShadowSelector};
4646
use tokio::time::{sleep, Duration};
4747
use tracing::{event, Level};
4848
use tracing_actix_web::TracingLogger;
@@ -217,12 +217,10 @@ pub async fn run_server<T: 'static + BottlerocketShadowClient>(
217217
)
218218
.wrap(RequestTracing::new())
219219
.wrap(RequestMetrics::default())
220-
.route(
221-
"/metrics",
222-
web::get().to(PrometheusMetricsHandler::new(prometheus_registry.clone())),
223-
)
220+
.route("/metrics", web::get().to(crate::telemetry::vending_metrics))
224221
.wrap(TracingLogger::<telemetry::BrupopApiserverRootSpanBuilder>::new())
225222
.app_data(Data::new(settings.clone()))
223+
.app_data(Data::new(prometheus_registry.clone()))
226224
.service(
227225
web::resource(NODE_RESOURCE_ENDPOINT)
228226
.route(web::post().to(node::create_bottlerocket_shadow_resource::<T>))

apiserver/src/main.rs

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,43 @@
1-
use apiserver::api::{self, APIServerSettings};
2-
use apiserver_error::{StartServerSnafu, StartTelemetrySnafu};
3-
use models::node::K8SBottlerocketShadowClient;
4-
use models::telemetry;
5-
use opentelemetry::global;
6-
use tracing::{event, Level};
1+
use std::convert::TryFrom;
2+
use std::env;
3+
use std::fs;
4+
use std::sync::OnceLock;
75

6+
use opentelemetry::global;
87
use opentelemetry::KeyValue;
98
use opentelemetry_sdk::metrics::SdkMeterProvider;
109
use opentelemetry_sdk::Resource;
1110
use snafu::ResultExt;
11+
use tracing::{event, Level};
1212

13-
use std::convert::TryFrom;
14-
use std::env;
15-
use std::fs;
13+
use apiserver::api::{self, APIServerSettings};
14+
use apiserver_error::{StartServerSnafu, StartTelemetrySnafu};
15+
use models::node::K8SBottlerocketShadowClient;
16+
use models::telemetry;
1617

1718
// By default, errors resulting in termination of the apiserver are written to this file,
1819
// which is the location kubernetes uses by default to surface termination-causing errors.
1920
const TERMINATION_LOG: &str = "/dev/termination-log";
2021
const APISERVER_INTERNAL_PORT_ENV_VAR: &str = "APISERVER_INTERNAL_PORT";
2122

23+
// Store the meter provider so we can shut it down properly
24+
static METER_PROVIDER: OnceLock<SdkMeterProvider> = OnceLock::new();
25+
26+
/// Custom error handler for OpenTelemetry operations
27+
fn handle_opentelemetry_error(operation: &str, error: impl std::fmt::Display) {
28+
tracing::error!("OpenTelemetry {} error: {}", operation, error);
29+
}
30+
2231
#[actix_web::main]
2332
async fn main() {
2433
main_inner().await;
2534

26-
opentelemetry::global::shutdown_tracer_provider();
35+
// Properly shutdown the OpenTelemetry meter provider
36+
if let Some(provider) = METER_PROVIDER.get() {
37+
if let Err(e) = provider.shutdown() {
38+
handle_opentelemetry_error("shutdown", e);
39+
}
40+
}
2741
}
2842

2943
async fn main_inner() {
@@ -56,8 +70,17 @@ async fn run_server() -> Result<(), apiserver_error::Error> {
5670

5771
let prometheus_provider = SdkMeterProvider::builder()
5872
.with_reader(prometheus_exporter)
59-
.with_resource(Resource::new([KeyValue::new("service.name", "apiserver")]))
73+
.with_resource(
74+
Resource::builder()
75+
.with_attribute(KeyValue::new("service.name", "apiserver"))
76+
.build(),
77+
)
6078
.build();
79+
80+
// Store the provider for shutdown
81+
METER_PROVIDER
82+
.set(prometheus_provider.clone())
83+
.expect("Failed to set meter provider");
6184
global::set_meter_provider(prometheus_provider);
6285

6386
let incluster_config =
@@ -130,7 +153,7 @@ pub mod apiserver_error {
130153

131154
#[snafu(display("Error creating prometheus registry: '{}'", source))]
132155
PrometheusRegsitry {
133-
source: opentelemetry::metrics::MetricsError,
156+
source: opentelemetry_sdk::metrics::MetricError,
134157
},
135158
}
136159
}

apiserver/src/telemetry.rs

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1-
use crate::api::NO_TELEMETRY_ENDPOINTS;
2-
use crate::constants::HEADER_BRUPOP_NODE_NAME;
1+
use std::collections::HashSet;
32

43
use actix_web::body::MessageBody;
54
use actix_web::dev::{ServiceRequest, ServiceResponse};
5+
use actix_web::{http::header::ContentType, web::Data, HttpResponse};
66
use lazy_static::lazy_static;
7+
use prometheus::{Encoder, TextEncoder};
78
use tracing::Span;
89
use tracing_actix_web::{DefaultRootSpanBuilder, RootSpanBuilder};
910

10-
use std::collections::HashSet;
11+
use crate::api::NO_TELEMETRY_ENDPOINTS;
12+
use crate::constants::HEADER_BRUPOP_NODE_NAME;
1113

1214
// tracing-actix-web doesn't provide a convenient way to remove any routes from the logs, so we use a global
1315
// settings containing API paths to generate empty `tracing::Span`s on paths which we don't want logged.
@@ -48,3 +50,30 @@ impl RootSpanBuilder for BrupopApiserverRootSpanBuilder {
4850
DefaultRootSpanBuilder::on_request_end(span, response);
4951
}
5052
}
53+
54+
/// Custom error handler for OpenTelemetry metrics encoding errors
55+
fn handle_metrics_error(err: prometheus::Error) {
56+
tracing::error!("Metrics encoding error: {}", err);
57+
}
58+
59+
pub async fn vending_metrics(registry: Data<prometheus::Registry>) -> HttpResponse {
60+
let encoder = TextEncoder::new();
61+
let metric_families = registry.gather();
62+
let mut buf = Vec::new();
63+
64+
match encoder.encode(&metric_families[..], &mut buf) {
65+
Ok(()) => {
66+
let body = String::from_utf8(buf).unwrap_or_default();
67+
HttpResponse::Ok()
68+
.insert_header(ContentType::plaintext())
69+
.body(body)
70+
}
71+
Err(err) => {
72+
handle_metrics_error(err);
73+
// Return empty metrics response on error
74+
HttpResponse::InternalServerError()
75+
.insert_header(ContentType::plaintext())
76+
.body("# Metrics encoding error\n")
77+
}
78+
}
79+
}

controller/src/controller.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -439,11 +439,11 @@ fn find_unlabeled_nodes(mut nodes: Vec<Node>) -> Vec<String> {
439439

440440
#[instrument(skip(node))]
441441
fn node_has_label(node: &Node) -> bool {
442-
return node.labels().get_key_value(LABEL_BRUPOP_INTERFACE_NAME)
442+
node.labels().get_key_value(LABEL_BRUPOP_INTERFACE_NAME)
443443
== Some((
444444
&LABEL_BRUPOP_INTERFACE_NAME.to_string(),
445445
&BRUPOP_INTERFACE_VERSION.to_string(),
446-
));
446+
))
447447
}
448448

449449
fn read_env_var(env_var: &str) -> Result<String> {

controller/src/main.rs

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,6 @@
11
use std::{convert::TryFrom, env};
22

3-
use controller::{telemetry::vending_metrics, BrupopController};
4-
use models::{
5-
constants::CONTROLLER_INTERNAL_PORT,
6-
node::{BottlerocketShadow, K8SBottlerocketShadowClient},
7-
telemetry,
8-
};
9-
103
use actix_web::{web::Data, App, HttpServer};
11-
124
use futures::StreamExt;
135
use k8s_openapi::api::core::v1::Node;
146
use kube::{
@@ -20,11 +12,17 @@ use kube::{
2012
},
2113
ResourceExt,
2214
};
23-
2415
use opentelemetry_sdk::metrics::SdkMeterProvider;
2516
use snafu::ResultExt;
2617
use tracing::{event, Level};
2718

19+
use controller::{telemetry::vending_metrics, BrupopController};
20+
use models::{
21+
constants::CONTROLLER_INTERNAL_PORT,
22+
node::{BottlerocketShadow, K8SBottlerocketShadowClient},
23+
telemetry,
24+
};
25+
2826
/// The module-wide result type.
2927
type Result<T> = std::result::Result<T, controller_error::Error>;
3028

@@ -192,7 +190,7 @@ pub mod controller_error {
192190

193191
#[snafu(display("Error creating prometheus registry: '{}'", source))]
194192
PrometheusRegsitry {
195-
source: opentelemetry::metrics::MetricsError,
193+
source: opentelemetry_sdk::metrics::MetricError,
196194
},
197195
}
198196
}

controller/src/metrics.rs

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1-
use models::node::BottlerocketShadow;
2-
use opentelemetry::{metrics::Meter, Key};
3-
use snafu::ResultExt;
41
use std::collections::HashMap;
52
use std::sync::{Arc, Mutex};
3+
4+
use opentelemetry::{metrics::Meter, Key, KeyValue};
5+
use snafu::ResultExt;
66
use tracing::instrument;
77

8+
use models::node::BottlerocketShadow;
9+
810
const HOST_VERSION_KEY: Key = Key::from_static_str("bottlerocket_version");
911
const HOST_STATE_KEY: Key = Key::from_static_str("state");
1012

@@ -69,34 +71,31 @@ impl BrupopControllerMetrics {
6971
let hosts_data_clone_for_version = Arc::clone(&brupop_shared_hosts_data);
7072
let hosts_data_clone_for_state = Arc::clone(&brupop_shared_hosts_data);
7173

72-
// Observer for cluster host's bottlerocket version
73-
let brupop_hosts_version_observer = meter
74+
// Create observable gauges with callbacks using OpenTelemetry API
75+
let _brupop_hosts_version_observer = meter
7476
.u64_observable_gauge("brupop_hosts_version")
7577
.with_description("Brupop host's bottlerocket version")
76-
.init();
78+
.with_callback(move |observer| {
79+
let data = hosts_data_clone_for_version.lock().unwrap();
80+
for (host_version, count) in &data.hosts_version_count {
81+
let attributes = [KeyValue::new(HOST_VERSION_KEY, host_version.to_string())];
82+
observer.observe(*count, &attributes);
83+
}
84+
})
85+
.build();
7786

7887
// Observer for cluster host's brupop state
79-
let brupop_hosts_state_observer = meter
88+
let _brupop_hosts_state_observer = meter
8089
.u64_observable_gauge("brupop_hosts_state")
8190
.with_description("Brupop host's state")
82-
.init();
83-
84-
let _ = meter.register_callback(&[brupop_hosts_version_observer.as_any()], move |cx| {
85-
let data = hosts_data_clone_for_version.lock().unwrap();
86-
for (host_version, count) in &data.hosts_version_count {
87-
let labels = vec![HOST_VERSION_KEY.string(host_version.to_string())];
88-
cx.observe_u64(&brupop_hosts_version_observer, *count, &labels);
89-
}
90-
});
91-
92-
let _ = meter.register_callback(&[brupop_hosts_state_observer.as_any()], move |cx| {
93-
let data = hosts_data_clone_for_state.lock().unwrap();
94-
for (host_state, count) in &data.hosts_state_count {
95-
let labels = vec![HOST_STATE_KEY.string(host_state.to_string())];
96-
cx.observe_u64(&brupop_hosts_state_observer, *count, &labels);
97-
}
98-
});
99-
91+
.with_callback(move |observer| {
92+
let data = hosts_data_clone_for_state.lock().unwrap();
93+
for (host_state, count) in &data.hosts_state_count {
94+
let attributes = [KeyValue::new(HOST_STATE_KEY, host_state.to_string())];
95+
observer.observe(*count, &attributes);
96+
}
97+
})
98+
.build();
10099
BrupopControllerMetrics {
101100
brupop_shared_hosts_data,
102101
}

0 commit comments

Comments
 (0)