Skip to content

Commit 15d69b1

Browse files
authored
Improve shutdown handling in PeriodicReader (#2422)
1 parent 9b0ccce commit 15d69b1

File tree

1 file changed

+42
-16
lines changed

1 file changed

+42
-16
lines changed

opentelemetry-sdk/src/metrics/periodic_reader.rs

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ impl PeriodicReader {
158158
let reader = PeriodicReader {
159159
inner: Arc::new(PeriodicReaderInner {
160160
message_sender: Arc::new(message_sender),
161-
is_shutdown: AtomicBool::new(false),
161+
shutdown_invoked: AtomicBool::new(false),
162162
producer: Mutex::new(None),
163163
exporter: Arc::new(exporter),
164164
}),
@@ -300,7 +300,7 @@ struct PeriodicReaderInner {
300300
exporter: Arc<dyn PushMetricExporter>,
301301
message_sender: Arc<mpsc::Sender<Message>>,
302302
producer: Mutex<Option<Weak<dyn SdkProducer>>>,
303-
is_shutdown: AtomicBool,
303+
shutdown_invoked: AtomicBool,
304304
}
305305

306306
impl PeriodicReaderInner {
@@ -314,10 +314,6 @@ impl PeriodicReaderInner {
314314
}
315315

316316
fn collect(&self, rm: &mut ResourceMetrics) -> MetricResult<()> {
317-
if self.is_shutdown.load(std::sync::atomic::Ordering::Relaxed) {
318-
return Err(MetricError::Other("reader is shut down".into()));
319-
}
320-
321317
let producer = self.producer.lock().expect("lock poisoned");
322318
if let Some(p) = producer.as_ref() {
323319
p.upgrade()
@@ -378,9 +374,28 @@ impl PeriodicReaderInner {
378374
}
379375

380376
fn force_flush(&self) -> MetricResult<()> {
381-
if self.is_shutdown.load(std::sync::atomic::Ordering::Relaxed) {
382-
return Err(MetricError::Other("reader is shut down".into()));
377+
if self
378+
.shutdown_invoked
379+
.load(std::sync::atomic::Ordering::Relaxed)
380+
{
381+
return Err(MetricError::Other(
382+
"Cannot perform flush as PeriodicReader shutdown already invoked.".into(),
383+
));
383384
}
385+
386+
// TODO: Better message for this scenario.
387+
// Flush and Shutdown called from 2 threads Flush check shutdown
388+
// flag before shutdown thread sets it. Both threads attempt to send
389+
// message to the same channel. Case1: Flush thread sends message first,
390+
// shutdown thread sends message next. Flush would succeed, as
391+
// background thread won't process shutdown message until flush
392+
// triggered export is done. Case2: Shutdown thread sends message first,
393+
// flush thread sends message next. Shutdown would succeed, as
394+
// background thread would process shutdown message first. The
395+
// background exits so it won't receive the flush message. ForceFlush
396+
// returns Failure, but we could indicate specifically that shutdown has
397+
// completed. TODO is to see if this message can be improved.
398+
384399
let (response_tx, response_rx) = mpsc::channel();
385400
self.message_sender
386401
.send(Message::Flush(response_tx))
@@ -399,8 +414,13 @@ impl PeriodicReaderInner {
399414
}
400415

401416
fn shutdown(&self) -> MetricResult<()> {
402-
if self.is_shutdown.load(std::sync::atomic::Ordering::Relaxed) {
403-
return Err(MetricError::Other("Reader is already shut down".into()));
417+
if self
418+
.shutdown_invoked
419+
.swap(true, std::sync::atomic::Ordering::Relaxed)
420+
{
421+
return Err(MetricError::Other(
422+
"PeriodicReader shutdown already invoked.".into(),
423+
));
404424
}
405425

406426
// TODO: See if this is better to be created upfront.
@@ -410,16 +430,12 @@ impl PeriodicReaderInner {
410430
.map_err(|e| MetricError::Other(e.to_string()))?;
411431

412432
if let Ok(response) = response_rx.recv() {
413-
self.is_shutdown
414-
.store(true, std::sync::atomic::Ordering::Relaxed);
415433
if response {
416434
Ok(())
417435
} else {
418436
Err(MetricError::Other("Failed to shutdown".into()))
419437
}
420438
} else {
421-
self.is_shutdown
422-
.store(true, std::sync::atomic::Ordering::Relaxed);
423439
Err(MetricError::Other("Failed to shutdown".into()))
424440
}
425441
}
@@ -697,27 +713,31 @@ mod tests {
697713
collection_triggered_by_interval_helper();
698714
collection_triggered_by_flush_helper();
699715
collection_triggered_by_shutdown_helper();
716+
collection_triggered_by_drop_helper();
700717
}
701718

702719
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
703720
async fn collection_from_tokio_multi_with_one_worker() {
704721
collection_triggered_by_interval_helper();
705722
collection_triggered_by_flush_helper();
706723
collection_triggered_by_shutdown_helper();
724+
collection_triggered_by_drop_helper();
707725
}
708726

709727
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
710728
async fn collection_from_tokio_with_two_worker() {
711729
collection_triggered_by_interval_helper();
712730
collection_triggered_by_flush_helper();
713731
collection_triggered_by_shutdown_helper();
732+
collection_triggered_by_drop_helper();
714733
}
715734

716735
#[tokio::test(flavor = "current_thread")]
717736
async fn collection_from_tokio_current() {
718737
collection_triggered_by_interval_helper();
719738
collection_triggered_by_flush_helper();
720739
collection_triggered_by_shutdown_helper();
740+
collection_triggered_by_drop_helper();
721741
}
722742

723743
fn collection_triggered_by_interval_helper() {
@@ -742,7 +762,13 @@ mod tests {
742762
});
743763
}
744764

745-
fn collection_helper(trigger: fn(&SdkMeterProvider)) {
765+
fn collection_triggered_by_drop_helper() {
766+
collection_helper(|meter_provider| {
767+
drop(meter_provider);
768+
});
769+
}
770+
771+
fn collection_helper(trigger: fn(SdkMeterProvider)) {
746772
// Arrange
747773
let interval = std::time::Duration::from_millis(10);
748774
let exporter = InMemoryMetricExporter::default();
@@ -762,7 +788,7 @@ mod tests {
762788
.build();
763789

764790
// Act
765-
trigger(&meter_provider);
791+
trigger(meter_provider);
766792

767793
// Assert
768794
receiver

0 commit comments

Comments
 (0)