Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion core/src/telemetry/otel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use super::{
};
use crate::{abstractions::dbg_panic, telemetry::metrics::DEFAULT_S_BUCKETS};
use opentelemetry::{
self,
self, global,
metrics::{Meter, MeterProvider as MeterProviderT},
Key, KeyValue, Value,
};
Expand Down Expand Up @@ -121,6 +121,9 @@ pub(super) fn augment_meter_provider_with_defaults(
pub fn build_otlp_metric_exporter(
opts: OtelCollectorOptions,
) -> Result<CoreOtelMeter, anyhow::Error> {
global::set_error_handler(|err| {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note this was removed recently in 0.27 (ref open-telemetry/opentelemetry-rust#2260), so we either need to say in the dep that we are 0.26 (not sure if you can say < 0.27) or look at that migration guide and see if just upgrading to 0.27 is enough to get proper logs here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the change only changes the behavior on how you can override their default logging behavior, we would still need to override their behavior, just in a different way now. I'm not sure it's worth upgrading to 0.27 over this.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think my concern is whether other people want to upgrade to 0.27+. I am not sure what Cargo's auto-upgrade logic is for zero-ver. Obviously one day we will need to as well even if that day is not today (can't stay on 0.26 forever).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Who are the "other" people mentioned here? customers?

I am not sure what Cargo's auto-upgrade logic is for zero-ver.

I assume cargo wouldn't auto-upgrade from 0.26 to 0.27, since there are a number of breaking changes with that update.

Obviously one day we will need to as well even if that day is not today (can't stay on 0.26 forever).

Agreed, I'm just reluctant to upgrade today 😅 but now that I've written a test, that future upgrade should be easier

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Who are the "other" people mentioned here? customers?

Nope just us internally

Agreed, I'm just reluctant to upgrade today

Works for me

tracing::error!("{}", err);
})?;
Comment on lines +124 to +126
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if there is a better more global place to set this instead of on each build_otlp_metric_exporter call? Unsure if we have global metric initialization code.

let exporter = match opts.protocol {
OtlpProtocol::Grpc => {
let mut exporter = opentelemetry_otlp::TonicExporterBuilder::default()
Expand Down
77 changes: 75 additions & 2 deletions tests/integ_tests/metrics_tests.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
use anyhow::anyhow;
use assert_matches::assert_matches;
use std::string::ToString;
use std::{collections::HashMap, env, net::SocketAddr, sync::Arc, time::Duration};
use std::{
collections::HashMap,
env,
net::SocketAddr,
string::ToString,
sync::{Arc, Mutex},
time::Duration,
};
use temporal_client::{
WorkflowClientTrait, WorkflowOptions, WorkflowService, REQUEST_LATENCY_HISTOGRAM_NAME,
};
Expand Down Expand Up @@ -48,6 +54,7 @@ use temporal_sdk_core_test_utils::{
PROMETHEUS_QUERY_API,
};
use tokio::{join, sync::Barrier, task::AbortHandle};
use tracing_subscriber::fmt::MakeWriter;
use url::Url;

static ANY_PORT: &str = "127.0.0.1:0";
Expand Down Expand Up @@ -900,3 +907,69 @@ async fn evict_on_complete_does_not_count_as_forced_eviction() {
// Metric shouldn't show up at all, since it's zero the whole time.
assert!(!body.contains("temporal_sticky_cache_total_forced_eviction"));
}

struct CapturingWriter {
buf: Arc<Mutex<Vec<u8>>>,
}

impl MakeWriter<'_> for CapturingWriter {
type Writer = CapturingHandle;

fn make_writer(&self) -> Self::Writer {
CapturingHandle(self.buf.clone())
}
}

struct CapturingHandle(Arc<Mutex<Vec<u8>>>);

impl std::io::Write for CapturingHandle {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
let mut b = self.0.lock().unwrap();
b.extend_from_slice(buf);
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}

#[tokio::test]
async fn otel_errors_logged_as_errors() {
// Set up tracing subscriber to capture ERROR logs
let logs = Arc::new(Mutex::new(Vec::new()));
let writer = CapturingWriter { buf: logs.clone() };
let subscriber = tracing_subscriber::fmt().with_writer(writer).finish();
let _guard = tracing::subscriber::set_default(subscriber);

let opts = OtelCollectorOptionsBuilder::default()
.url("https://localhostt:9995/v1/metrics".parse().unwrap()) // Invalid endpoint
.build()
.unwrap();
let exporter = build_otlp_metric_exporter(opts).unwrap();

let telemopts = TelemetryOptionsBuilder::default()
.metrics(Arc::new(exporter) as Arc<dyn CoreMeter>)
.build()
.unwrap();

let rt = CoreRuntime::new_assume_tokio(telemopts).unwrap();
let mut starter = CoreWfStarter::new_with_runtime("otel_errors_logged_as_errors", rt);
let _worker = starter.get_worker().await;

// Wait to allow exporter to attempt sending metrics and fail.
tokio::time::sleep(Duration::from_secs(2)).await;

let logs = logs.lock().unwrap();
let log_str = String::from_utf8_lossy(&logs);

assert!(
log_str.contains("ERROR"),
"Expected ERROR log not found in logs: {}",
log_str
);
assert!(
log_str.contains("Metrics exporter otlp failed with the grpc server returns error"),
"Expected an OTel exporter error message in logs: {}",
log_str
);
}
Loading