Skip to content

Issue using async_io Timer in both 1.83 and 1.85 #258

@npmenard

Description

@npmenard

Hi esp-rs team, I have been running into issues while using async_io::Timer in ESP32 projects with both rust 1.83 and rust 1.85. With rust 1.83 the program would ultimately run into a deadlock. The program hangs in concurrent-queue while trying to update atomic variables.

With rust 1.85 I have similar behavior with the addition of running into LoadProhibited or DoubleException errors.

Given that the behavior differs between 1.83 and 1.85, and that the deadlock relates to atomics I am filling this bug here let me know if this is the correct location and/or if you need more data.

This is the code I tried (also available at https://github.com/npmenard/cqueue-deadlock) in both case I am using the esp-idf-template against esp-idf 5.2.3

use std::{future::Future, task::Poll, time::Duration};

use async_io::block_on;
use esp_idf_svc::{
    eventloop::EspSystemEventLoop,
    hal::prelude::Peripherals,
    nvs::EspDefaultNvsPartition,
    wifi::{BlockingWifi, ClientConfiguration, Configuration, EspWifi},
};
use futures_lite::StreamExt;

const SSID: &str = env!("WIFI_SSID");
const PASSWORD: &str = env!("WIFI_PASS");

struct FutureExpample {
    large: [u32; 200],
}

impl Future for FutureExpample {
    type Output = u32;
    fn poll(
        self: std::pin::Pin<&mut Self>,
        _: &mut std::task::Context<'_>,
    ) -> std::task::Poll<Self::Output> {
        let will_complete = unsafe { esp_idf_svc::sys::esp_random() } % 2;
        if will_complete == 1 {
            return Poll::Ready(
                self.large[unsafe { esp_idf_svc::sys::esp_random() % (self.large.len() as u32 - 1) }
                    as usize],
            );
        }
        Poll::Pending
    }
}

async fn runner() {
    use async_io::Timer;
    use futures_util::stream::FuturesUnordered;

    loop {
        let mut futures = FuturesUnordered::new();
        for _ in 0..100 {
            futures.push(async {
                let fut = FutureExpample {
                    large: [0xDEADBEFF_u32; 200],
                };
                futures_lite::future::or(fut, async {
                    let _ = Timer::after(Duration::from_millis(unsafe {
                        esp_idf_svc::sys::esp_random() % 1000
                    } as u64))
                    .await;
                    0
                })
                .await
            });
        }
        while let Some(_) = futures.next().await {}
        log::info!("next loop");
    }
}

fn main() {
    esp_idf_svc::sys::link_patches();
    esp_idf_svc::log::EspLogger::initialize_default();
    esp_idf_svc::sys::esp!(unsafe {
        esp_idf_svc::sys::esp_vfs_eventfd_register(&esp_idf_svc::sys::esp_vfs_eventfd_config_t {
            max_fds: 5,
        })
    })
    .unwrap();

    let peripherals = Peripherals::take().unwrap();
    let sys_loop = EspSystemEventLoop::take().unwrap();
    let nvs = EspDefaultNvsPartition::take().unwrap();

    let mut wifi = BlockingWifi::wrap(
        EspWifi::new(peripherals.modem, sys_loop.clone(), Some(nvs)).unwrap(),
        sys_loop,
    )
    .unwrap();

    start_wifi(&mut wifi);

    let ip_info = wifi.wifi().sta_netif().get_ip_info().unwrap();

    let exec = async_executor::LocalExecutor::new();

    block_on(exec.run(runner()));
}

fn start_wifi(wifi: &mut BlockingWifi<EspWifi<'static>>) {
    let wifi_configuration: Configuration = Configuration::Client(ClientConfiguration {
        ssid: SSID.try_into().unwrap(),
        bssid: None,
        auth_method: esp_idf_svc::wifi::AuthMethod::WPA2Personal,
        password: PASSWORD.try_into().unwrap(),
        channel: None,
        ..Default::default()
    });

    wifi.set_configuration(&wifi_configuration).unwrap();
    wifi.start().unwrap();
    wifi.connect().unwrap();
    wifi.wait_netif_up().unwrap();
}

The code Loop should run indefinitely (works on x86)

Bug

Deadlock example with 1.83

After a few minutes the WDT will trigger because of a deadlock in concurrent-queue
pthread task and main task race to do a push/pop to the queue but neither is able to make progress

I (112643) cqueue_deadlock: next loop
E (122641) task_wdt: Task watchdog got triggered. The following tasks/users did not reset the watchdog in time:
E (122641) task_wdt:  - IDLE0 (CPU 0)
E (122641) task_wdt: Tasks currently running:
E (122641) task_wdt: CPU 0: pthread
E (122641) task_wdt: CPU 1: IDLE1
E (122641) task_wdt: Aborting.
E (122641) task_wdt: Print CPU 0 (current core) backtrace




Backtrace: 0x400df26f:0x3ffe7790 0x400de03d:0x3ffe77e0 0x400dce05:0x3ffe7800 0x400dcb14:0x3ffe7860 0x400dceaa:0x3ffe7950 0x400ddbae:0x3ffe7a60 0x400df55c:0x3ffe7b40 0x400dd80b:0x3ffe7b60 0x400f8b33:0x3ffe7bb0 0x400f89b4:0x3ffe7bd0 0x4010e95c:0x3ffe7bf0
0x400df26f - concurrent_queue::bounded::Bounded<T>::pop
    at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/concurrent-queue-2.5.0/src/bounded.rs:228
0x400de03d - concurrent_queue::ConcurrentQueue<T>::pop
    at ??:??
0x400dce05 - <concurrent_queue::TryIter<T> as core::iter::traits::iterator::Iterator>::next
    at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/concurrent-queue-2.5.0/src/lib.rs:494
0x400dcb14 - async_io::reactor::Reactor::process_timers
    at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/async-io-2.4.0/src/reactor.rs:211
0x400dceaa - async_io::reactor::ReactorLock::react
    at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/async-io-2.4.0/src/reactor.rs:280
0x400ddbae - async_io::driver::main_loop
    at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/async-io-2.4.0/src/driver.rs:69
0x400df55c - async_io::driver::unparker::{{closure}}::{{closure}}
    at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/async-io-2.4.0/src/driver.rs:33
0x400dd80b - std::thread::Builder::spawn_unchecked_::{{closure}}::{{closure}}
    at /opt/rust/rustup/toolchains/esp/lib/rustlib/src/rust/library/std/src/thread/mod.rs:538
0x400f8b33 - <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once
    at /opt/rust/rustup/toolchains/esp/lib/rustlib/src/rust/library/alloc/src/boxed.rs:2454
0x400f89b4 - std::sys::pal::unix::thread::Thread::new::thread_start
    at /opt/rust/rustup/toolchains/esp/lib/rustlib/src/rust/library/std/src/sys/pal/unix/thread.rs:105
0x4010e95c - pthread_task_func
    at /opt/esp/esp-idf/components/pthread/pthread.c:196




ELF file SHA256: 000000000

CPU halted.

Bug with 1.85

After a few minutes the program will crash with double exception of load prohibited (see below) or deadlock as well

I (16759) cqueue_deadlock: next loop
Guru Meditation Error: Core  0 panic'ed (Double exception).

Core  0 register dump:
PC      : 0x40095722  PS      : 0x00040c36  A0      : 0x00000027  A1      : 0x3ffe77c0
A2      : 0x3ffb3dc0  A3      : 0x00000000  A4      : 0x00000000  A5      : 0x00000000
0x3ffb3dc0 - async_io::reactor::Reactor::get::REACTOR
    at ??:??
A6      : 0x0000c350  A7      : 0x3ffdd090  A8      : 0x800de157  A9      : 0x3ffe7780
A10     : 0x00000002  A11     : 0x3ffcc7ac  A12     : 0x00000000  A13     : 0x3ffcc7b8
A14     : 0x0000c350  A15     : 0x3ffcc7b4  SAR     : 0x0000000c  EXCCAUSE: 0x00000002
EXCVADDR: 0x400840c2  LBEG    : 0x40083b65  LEND    : 0x40083b6d  LCOUNT  : 0x00000027
0x40083b65 - esp_timer_impl_get_counter_reg
    at /opt/esp/esp-idf/components/esp_timer/src/esp_timer_impl_lac.c:118
0x40083b6d - esp_timer_impl_get_counter_reg
    at /opt/esp/esp-idf/components/esp_timer/src/esp_timer_impl_lac.c:128


Backtrace: 0x4009571f:0x3ffe77c0 0x00000024:0x400840f2 |<-CORRUPTED




ELF file SHA256: 000000000

CPU halted.
Guru Meditation Error: Core  0 panic'ed (LoadProhibited). Exception was unhandled.

Core  0 register dump:
PC      : 0x400df89b  PS      : 0x00060c30  A0      : 0x00060423  A1      : 0x3ffbd8e0
0x400df89b - core::task::wake::Context::waker
    at /opt/rust/rustup/toolchains/esp/lib/rustlib/src/rust/library/core/src/task/wake.rs:250
A2      : 0x3ffbfdb4  A3      : 0x00000004  A4      : 0x00060820  A5      : 0x00000000
A6      : 0x00000000  A7      : 0x00000000  A8      : 0x00060820  A9      : 0x3ffbd8c0
A10     : 0x3ffb3dc0  A11     : 0xffffffff  A12     : 0x0000004c  A13     : 0x00000000
0x3ffb3dc0 - async_io::reactor::Reactor::get::REACTOR
    at ??:??
A14     : 0x0000004c  A15     : 0x00000002  SAR     : 0x0000000c  EXCCAUSE: 0x0000001c
EXCVADDR: 0x00060820  LBEG    : 0x40083b65  LEND    : 0x40083b6d  LCOUNT  : 0x00000027
0x40083b65 - esp_timer_impl_get_counter_reg
    at /opt/esp/esp-idf/components/esp_timer/src/esp_timer_impl_lac.c:118
0x40083b6d - esp_timer_impl_get_counter_reg
    at /opt/esp/esp-idf/components/esp_timer/src/esp_timer_impl_lac.c:128


Backtrace: 0x400df898:0x3ffbd8e0 0x00060420:0x3ffbd910 |<-CORRUPTED
0x400df898 - async_lock::once_cell::OnceCell<T>::get_or_init_blocking
    at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-1949cf8c6b5b557f/async-lock-3.4.0/src/once_cell.rs:516

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions