Skip to content

Commit 61c8068

Browse files
committed
Merge branch 'main' into feature/file-logging
2 parents 8d9b190 + f949739 commit 61c8068

File tree

13 files changed

+381
-454
lines changed

13 files changed

+381
-454
lines changed

Cargo.lock

Lines changed: 102 additions & 334 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,12 @@ k8s-openapi = { version = "0.22.0", default-features = false, features = ["schem
3232
# We use rustls instead of openssl for easier portablitly, e.g. so that we can build stackablectl without the need to vendor (build from source) openssl
3333
kube = { version = "0.92.0", default-features = false, features = ["client", "jsonpatch", "runtime", "derive", "rustls-tls"] }
3434
lazy_static = "1.4.0"
35-
opentelemetry = "0.22.0"
36-
opentelemetry_sdk = { version = "0.22.1", features = ["rt-tokio"] }
37-
opentelemetry-appender-tracing = "0.3.0"
38-
opentelemetry-jaeger = { version = "0.21.0", features = ["rt-tokio"] }
39-
opentelemetry-otlp = "0.15.0"
40-
opentelemetry-semantic-conventions = "0.14.0"
35+
opentelemetry = "0.23.0"
36+
opentelemetry_sdk = { version = "0.23.0", features = ["rt-tokio"] }
37+
opentelemetry-appender-tracing = "0.4.0"
38+
opentelemetry-jaeger = { version = "0.22.0", features = ["rt-tokio"] }
39+
opentelemetry-otlp = "0.16.0"
40+
opentelemetry-semantic-conventions = "0.15.0"
4141
p256 = { version = "0.13.2", features = ["ecdsa"] }
4242
pin-project = "1.1.5"
4343
proc-macro2 = "1.0.83"
@@ -67,7 +67,7 @@ tokio-test = "0.4.4"
6767
tower = "0.4.13"
6868
tower-http = { version = "0.5.2", features = ["trace"] }
6969
tracing = "0.1.40"
70-
tracing-opentelemetry = "0.23.0"
70+
tracing-opentelemetry = "0.24.0"
7171
tracing-subscriber = { version = "0.3.18", features = ["env-filter", "json"] }
7272
url = { version = "2.5.0", features = ["serde"] }
7373
x509-cert = { version = "0.2.5", features = ["builder"] }

crates/stackable-operator/CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file.
66

77
### Added
88

9+
- Added `ProductImage::product_version` utility function ([#817], [#818])
910
- Added support for logging to files ([#814]).
1011

1112
### Changed
@@ -16,15 +17,19 @@ All notable changes to this project will be documented in this file.
1617
- [Blog Post - Breaking Change](https://kube.rs/blog/2024/06/11/watcher-memory-improvements/#breaking-change)
1718
- [kube#1494](https://github.com/kube-rs/kube/pull/1494)
1819
- [kube#1504](https://github.com/kube-rs/kube/pull/1504)
20+
- Upgrade opentelemetry crates ([#811]).
1921
- Changed OPA Bundle Builder Vector config to read from the new log-to-file setup ([#814]).
2022

2123
### Fixed
2224

2325
- Product image selection pull request version override now only applies to pull requests ([#812]).
2426

2527
[#804]: https://github.com/stackabletech/operator-rs/pull/804
28+
[#811]: https://github.com/stackabletech/operator-rs/pull/811
2629
[#812]: https://github.com/stackabletech/operator-rs/pull/812
2730
[#814]: https://github.com/stackabletech/operator-rs/pull/814
31+
[#817]: https://github.com/stackabletech/operator-rs/pull/817
32+
[#818]: https://github.com/stackabletech/operator-rs/pull/818
2833

2934
## [0.69.3] - 2024-06-12
3035

crates/stackable-operator/src/commons/product_image_selection.rs

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -105,17 +105,18 @@ impl ProductImage {
105105
let image_pull_policy = self.pull_policy.as_ref().to_string();
106106
let pull_secrets = self.pull_secrets.clone();
107107

108+
let product_version = self.product_version().to_owned();
109+
108110
match &self.image_selection {
109111
ProductImageSelection::Custom(image_selection) => {
110112
let image = ImageRef::parse(&image_selection.custom);
111113
let image_tag_or_hash = image.tag.or(image.hash).unwrap_or("latest".to_string());
112-
let mut app_version_label =
113-
format!("{}-{}", image_selection.product_version, image_tag_or_hash);
114+
let mut app_version_label = format!("{}-{}", product_version, image_tag_or_hash);
114115
// TODO Use new label mechanism once added
115116
app_version_label.truncate(63);
116117

117118
ResolvedProductImage {
118-
product_version: image_selection.product_version.to_string(),
119+
product_version,
119120
app_version_label,
120121
image: image_selection.custom.clone(),
121122
image_pull_policy,
@@ -145,14 +146,10 @@ impl ProductImage {
145146
};
146147
let image = format!(
147148
"{repo}/{image_base_name}:{product_version}-stackable{stackable_version}",
148-
product_version = image_selection.product_version,
149-
);
150-
let app_version_label = format!(
151-
"{product_version}-stackable{stackable_version}",
152-
product_version = image_selection.product_version,
153149
);
150+
let app_version_label = format!("{product_version}-stackable{stackable_version}",);
154151
ResolvedProductImage {
155-
product_version: image_selection.product_version.to_string(),
152+
product_version,
156153
app_version_label,
157154
image,
158155
image_pull_policy,
@@ -161,6 +158,22 @@ impl ProductImage {
161158
}
162159
}
163160
}
161+
162+
/// The product version is always known without having to resolve the image.
163+
/// In the future we might have a more clever version, which let's the operator pick a recommended product version
164+
/// automatically, e.g. from the LTS release line.
165+
pub fn product_version(&self) -> &str {
166+
match &self.image_selection {
167+
ProductImageSelection::Custom(ProductImageCustom {
168+
product_version: pv,
169+
..
170+
}) => pv,
171+
ProductImageSelection::StackableVersion(ProductImageStackableVersion {
172+
product_version: pv,
173+
..
174+
}) => pv,
175+
}
176+
}
164177
}
165178

166179
#[cfg(test)]

crates/stackable-telemetry/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,12 @@ All notable changes to this project will be documented in this file.
77
### Changed
88

99
- Add support for setting the environment variable for each configured tracing subscriber ([#801]).
10+
- Use OpenTelemetry Context in Axum instrumentation layer, adjust log and span level, simplify trace config ([#811]).
11+
- tracing: Upgrade opentelemetry crates, simplify trace config, fix shutdown conditions, use new way to shutdown LoggerProvider.
12+
- instrumentation/axum: demote event severity for errors easily caused by clients, replace parent span context if given in http header and link to previous trace contexts.
1013

1114
[#801]: https://github.com/stackabletech/operator-rs/pull/801
15+
[#811]: https://github.com/stackabletech/operator-rs/pull/811
1216

1317
## [0.1.0] - 2024-05-08
1418

crates/stackable-telemetry/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ opentelemetry = { workspace = true, features = ["logs"] }
1313
opentelemetry-appender-tracing.workspace = true
1414
opentelemetry-otlp = { workspace = true, features = ["gzip-tonic", "logs"] }
1515
opentelemetry-semantic-conventions.workspace = true
16-
opentelemetry_sdk = { workspace = true, features = ["logs", "rt-tokio"] }
16+
opentelemetry_sdk = { workspace = true, features = ["logs", "rt-tokio", "logs_level_enabled"] }
1717
pin-project.workspace = true
1818
snafu.workspace = true
1919
tokio.workspace = true

crates/stackable-telemetry/src/instrumentation/axum/mod.rs

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,14 @@ use axum::{
2121
response::Response,
2222
};
2323
use futures_util::ready;
24-
use opentelemetry::trace::SpanKind;
24+
use opentelemetry::{
25+
trace::{SpanKind, TraceContextExt},
26+
Context,
27+
};
2528
use pin_project::pin_project;
2629
use snafu::{ResultExt, Snafu};
2730
use tower::{Layer, Service};
28-
use tracing::{debug, field::Empty, instrument, trace_span, Span};
31+
use tracing::{field::Empty, Span};
2932
use tracing_opentelemetry::OpenTelemetrySpanExt;
3033

3134
mod extractor;
@@ -91,7 +94,6 @@ impl<S> Layer<S> for TraceLayer {
9194

9295
impl TraceLayer {
9396
/// Creates a new default trace layer.
94-
#[instrument(name = "create_trace_layer")]
9597
pub fn new() -> Self {
9698
Self::default()
9799
}
@@ -367,7 +369,7 @@ impl SpanExt for Span {
367369
let span_name = req.span_name();
368370
let url = req.uri();
369371

370-
debug!(
372+
tracing::trace!(
371373
http_method,
372374
span_name,
373375
?url,
@@ -396,8 +398,7 @@ impl SpanExt for Span {
396398
// Setting common fields first
397399
// See https://opentelemetry.io/docs/specs/semconv/http/http-spans/#common-attributes
398400

399-
debug!("create http span");
400-
let span = trace_span!(
401+
let span = tracing::trace_span!(
401402
"HTTP request",
402403
"otel.name" = span_name,
403404
"otel.kind" = ?SpanKind::Server,
@@ -418,9 +419,44 @@ impl SpanExt for Span {
418419
);
419420

420421
// Set the parent span based on the extracted context
421-
debug!("set parent span based on extracted context");
422-
let parent = HeaderExtractor::new(req.headers()).extract_context();
423-
span.set_parent(parent);
422+
//
423+
// The OpenTelemetry spec does not allow trace ids to be updated after
424+
// a span has been created. Since the (optional) new trace id given by
425+
// a client is only knowable after handling the request, it is not
426+
// available to the existing parent spans for the lower layers (tcp/tls
427+
// handling).
428+
//
429+
// Therefore, we have to made a decision about linking the two traces.
430+
// These are the options:
431+
// 1. Link to the trace id supplied in the incoming request, or
432+
// 2. Link to the current trace id, then set the parent contex based on
433+
// trace information supplied in the incoming request.
434+
//
435+
// Neither is ideal, as it means there are (at least) two traces to look
436+
// at to get a complete picture of what happened over the request.
437+
//
438+
// Option 1 is not viable, as the trace id in the response headers will
439+
// not be the same as what the client sent. Yet we are supposed to pass
440+
// their trace id in any further requests.
441+
//
442+
// We will go with option 2 as it at least keeps the higher layer spans
443+
// in one trace, which is likely going to be more useful to the person
444+
// visualising the traces.
445+
let new_parent = HeaderExtractor::new(req.headers()).extract_context();
446+
let new_span_context = new_parent.span().span_context().clone();
447+
let current_span_context = Context::current().span().span_context().clone();
448+
449+
if new_span_context != current_span_context {
450+
tracing::trace!(
451+
opentelemetry.trace_id.from = ?current_span_context.trace_id(),
452+
opentelemetry.trace_id.to = ?new_span_context.trace_id(),
453+
"set parent span context based on context extracted from request headers"
454+
);
455+
456+
Span::current().add_link(new_parent.span().span_context().clone());
457+
span.add_link(Context::current().span().span_context().to_owned());
458+
span.set_parent(new_parent);
459+
}
424460

425461
if let Some(user_agent) = req.user_agent() {
426462
span.record("user_agent.original", user_agent);

crates/stackable-telemetry/src/tracing.rs

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,9 @@
99
use opentelemetry::KeyValue;
1010
use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge;
1111
use opentelemetry_sdk::{
12-
logs,
12+
logs::{self, LoggerProvider},
1313
propagation::TraceContextPropagator,
14-
trace::{self, RandomIdGenerator, Sampler},
15-
Resource,
14+
trace, Resource,
1615
};
1716
use opentelemetry_semantic_conventions::resource;
1817
use snafu::{ResultExt as _, Snafu};
@@ -46,6 +45,8 @@ pub enum Error {
4645
///
4746
/// #[tokio::main]
4847
/// async fn main() -> Result<(), Error> {
48+
/// // IMPORTANT: Name the guard variable appropriately, do not just use
49+
/// // `let _ =`, as that will drop immediately.
4950
/// let _tracing_guard = Tracing::builder()
5051
/// .service_name("test")
5152
/// .with_console_output("TEST_CONSOLE", LevelFilter::INFO)
@@ -125,6 +126,7 @@ pub struct Tracing {
125126
console_log_config: SubscriberConfig,
126127
otlp_log_config: SubscriberConfig,
127128
otlp_trace_config: SubscriberConfig,
129+
logger_provider: Option<LoggerProvider>,
128130
}
129131

130132
impl Tracing {
@@ -134,7 +136,10 @@ impl Tracing {
134136

135137
/// Initialise the configured tracing subscribers, returning a guard that
136138
/// will shutdown the subscribers when dropped.
137-
pub fn init(self) -> Result<Tracing> {
139+
///
140+
/// IMPORTANT: Name the guard variable appropriately, do not just use
141+
/// `let _ =`, as that will drop immediately.
142+
pub fn init(mut self) -> Result<Tracing> {
138143
let mut layers: Vec<Box<dyn Layer<Registry> + Sync + Send>> = Vec::new();
139144

140145
if self.console_log_config.enabled {
@@ -168,10 +173,11 @@ impl Tracing {
168173

169174
// Convert `tracing::Event` to OpenTelemetry logs
170175
layers.push(
171-
OpenTelemetryTracingBridge::new(otel_log.provider())
176+
OpenTelemetryTracingBridge::new(&otel_log)
172177
.with_filter(env_filter_layer)
173178
.boxed(),
174179
);
180+
self.logger_provider = Some(otel_log);
175181
}
176182

177183
if self.otlp_trace_config.enabled {
@@ -186,15 +192,9 @@ impl Tracing {
186192
let otel_tracer = opentelemetry_otlp::new_pipeline()
187193
.tracing()
188194
.with_exporter(trace_exporter)
189-
.with_trace_config(
190-
trace::config()
191-
.with_sampler(Sampler::AlwaysOn) // TODO (@NickLarsenNZ): Make this configurable. See also Sampler::ParentBased
192-
.with_id_generator(RandomIdGenerator::default())
193-
.with_resource(Resource::new(vec![KeyValue::new(
194-
resource::SERVICE_NAME,
195-
self.service_name,
196-
)])),
197-
)
195+
.with_trace_config(trace::config().with_resource(Resource::new(vec![
196+
KeyValue::new(resource::SERVICE_NAME, self.service_name),
197+
])))
198198
.install_batch(opentelemetry_sdk::runtime::Tokio)
199199
.context(InstallOtelTraceExporterSnafu)?;
200200

@@ -223,16 +223,31 @@ impl Tracing {
223223
.context(SetGlobalDefaultSubscriberSnafu)?;
224224
}
225225

226+
// IMPORTANT: we must return self, otherwise Drop will be called and uninitialise tracing
226227
Ok(self)
227228
}
228229
}
229230

230231
impl Drop for Tracing {
231232
fn drop(&mut self) {
232-
// NOTE (@NickLarsenNZ): These might eventually be replaced with something like SdkMeterProvider::shutdown(&self)
233-
// see: https://github.com/open-telemetry/opentelemetry-rust/pull/1412/files#r1409608679
234-
opentelemetry::global::shutdown_tracer_provider();
235-
opentelemetry::global::shutdown_logger_provider();
233+
tracing::debug!(
234+
opentelemetry.tracing.enabled = self.otlp_trace_config.enabled,
235+
opentelemetry.logger.enabled = self.otlp_log_config.enabled,
236+
"shutting down opentelemetry OTLP providers"
237+
);
238+
239+
if self.otlp_trace_config.enabled {
240+
// NOTE (@NickLarsenNZ): This might eventually be replaced with something like SdkMeterProvider::shutdown(&self)
241+
// as has been done with the LoggerProvider (further below)
242+
// see: https://github.com/open-telemetry/opentelemetry-rust/pull/1412/files#r1409608679
243+
opentelemetry::global::shutdown_tracer_provider();
244+
}
245+
246+
if let Some(logger_provider) = &self.logger_provider {
247+
if let Err(error) = logger_provider.shutdown() {
248+
tracing::error!(%error, "unable to shutdown LoggerProvider");
249+
}
250+
}
236251
}
237252
}
238253

@@ -417,6 +432,7 @@ impl TracingBuilder<builder_state::Config> {
417432
console_log_config: self.console_log_config,
418433
otlp_log_config: self.otlp_log_config,
419434
otlp_trace_config: self.otlp_trace_config,
435+
logger_provider: None,
420436
}
421437
}
422438
}

crates/stackable-webhook/CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,21 @@ All notable changes to this project will be documented in this file.
44

55
## [Unreleased]
66

7+
## Changed
8+
9+
- Remove instrumentation of long running functions, add more granular instrumentation of futures. Adjust span and event levels ([#811]).
10+
711
### Fixed
812

913
- Fix the extraction of `ConnectInfo` (data about the connection client) and
1014
the `Host` info (data about the server) in the `AxumTraceLayer`. This was
1115
previously not extracted correctly and thus not included in the OpenTelemetry
1216
compatible traces ([#806]).
17+
- Spawn blocking code on a blocking thread ([#815]).
1318

1419
[#806]: https://github.com/stackabletech/operator-rs/pull/806
20+
[#811]: https://github.com/stackabletech/operator-rs/pull/811
21+
[#815]: https://github.com/stackabletech/operator-rs/pull/815
1522

1623
## [0.3.0] - 2024-05-08
1724

crates/stackable-webhook/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@ hyper-util.workspace = true
1717
hyper.workspace = true
1818
k8s-openapi.workspace = true
1919
kube.workspace = true
20+
opentelemetry.workspace = true
2021
serde_json.workspace = true
2122
snafu.workspace = true
2223
tokio-rustls.workspace = true
2324
tokio.workspace = true
2425
tower-http.workspace = true
2526
tower.workspace = true
2627
tracing.workspace = true
28+
tracing-opentelemetry.workspace = true

0 commit comments

Comments
 (0)