-
Notifications
You must be signed in to change notification settings - Fork 38
Description
Hi esp-rs team, I have been running into issues while using async_io::Timer
in ESP32 projects with both rust 1.83 and rust 1.85. With rust 1.83 the program would ultimately run into a deadlock. The program hangs in concurrent-queue
while trying to update atomic variables.
With rust 1.85 I have similar behavior with the addition of running into LoadProhibited or DoubleException errors.
Given that the behavior differs between 1.83 and 1.85, and that the deadlock relates to atomics I am filling this bug here let me know if this is the correct location and/or if you need more data.
This is the code I tried (also available at https://github.com/npmenard/cqueue-deadlock) in both case I am using the esp-idf-template against esp-idf 5.2.3
use std::{future::Future, task::Poll, time::Duration};
use async_io::block_on;
use esp_idf_svc::{
eventloop::EspSystemEventLoop,
hal::prelude::Peripherals,
nvs::EspDefaultNvsPartition,
wifi::{BlockingWifi, ClientConfiguration, Configuration, EspWifi},
};
use futures_lite::StreamExt;
const SSID: &str = env!("WIFI_SSID");
const PASSWORD: &str = env!("WIFI_PASS");
struct FutureExpample {
large: [u32; 200],
}
impl Future for FutureExpample {
type Output = u32;
fn poll(
self: std::pin::Pin<&mut Self>,
_: &mut std::task::Context<'_>,
) -> std::task::Poll<Self::Output> {
let will_complete = unsafe { esp_idf_svc::sys::esp_random() } % 2;
if will_complete == 1 {
return Poll::Ready(
self.large[unsafe { esp_idf_svc::sys::esp_random() % (self.large.len() as u32 - 1) }
as usize],
);
}
Poll::Pending
}
}
async fn runner() {
use async_io::Timer;
use futures_util::stream::FuturesUnordered;
loop {
let mut futures = FuturesUnordered::new();
for _ in 0..100 {
futures.push(async {
let fut = FutureExpample {
large: [0xDEADBEFF_u32; 200],
};
futures_lite::future::or(fut, async {
let _ = Timer::after(Duration::from_millis(unsafe {
esp_idf_svc::sys::esp_random() % 1000
} as u64))
.await;
0
})
.await
});
}
while let Some(_) = futures.next().await {}
log::info!("next loop");
}
}
fn main() {
esp_idf_svc::sys::link_patches();
esp_idf_svc::log::EspLogger::initialize_default();
esp_idf_svc::sys::esp!(unsafe {
esp_idf_svc::sys::esp_vfs_eventfd_register(&esp_idf_svc::sys::esp_vfs_eventfd_config_t {
max_fds: 5,
})
})
.unwrap();
let peripherals = Peripherals::take().unwrap();
let sys_loop = EspSystemEventLoop::take().unwrap();
let nvs = EspDefaultNvsPartition::take().unwrap();
let mut wifi = BlockingWifi::wrap(
EspWifi::new(peripherals.modem, sys_loop.clone(), Some(nvs)).unwrap(),
sys_loop,
)
.unwrap();
start_wifi(&mut wifi);
let ip_info = wifi.wifi().sta_netif().get_ip_info().unwrap();
let exec = async_executor::LocalExecutor::new();
block_on(exec.run(runner()));
}
fn start_wifi(wifi: &mut BlockingWifi<EspWifi<'static>>) {
let wifi_configuration: Configuration = Configuration::Client(ClientConfiguration {
ssid: SSID.try_into().unwrap(),
bssid: None,
auth_method: esp_idf_svc::wifi::AuthMethod::WPA2Personal,
password: PASSWORD.try_into().unwrap(),
channel: None,
..Default::default()
});
wifi.set_configuration(&wifi_configuration).unwrap();
wifi.start().unwrap();
wifi.connect().unwrap();
wifi.wait_netif_up().unwrap();
}
The code Loop should run indefinitely (works on x86)
Bug
Deadlock example with 1.83
After a few minutes the WDT will trigger because of a deadlock in concurrent-queue
pthread task and main task race to do a push/pop to the queue but neither is able to make progress
I (112643) cqueue_deadlock: next loop
E (122641) task_wdt: Task watchdog got triggered. The following tasks/users did not reset the watchdog in time:
E (122641) task_wdt: - IDLE0 (CPU 0)
E (122641) task_wdt: Tasks currently running:
E (122641) task_wdt: CPU 0: pthread
E (122641) task_wdt: CPU 1: IDLE1
E (122641) task_wdt: Aborting.
E (122641) task_wdt: Print CPU 0 (current core) backtrace
Backtrace: 0x400df26f:0x3ffe7790 0x400de03d:0x3ffe77e0 0x400dce05:0x3ffe7800 0x400dcb14:0x3ffe7860 0x400dceaa:0x3ffe7950 0x400ddbae:0x3ffe7a60 0x400df55c:0x3ffe7b40 0x400dd80b:0x3ffe7b60 0x400f8b33:0x3ffe7bb0 0x400f89b4:0x3ffe7bd0 0x4010e95c:0x3ffe7bf0
0x400df26f - concurrent_queue::bounded::Bounded<T>::pop
at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/concurrent-queue-2.5.0/src/bounded.rs:228
0x400de03d - concurrent_queue::ConcurrentQueue<T>::pop
at ??:??
0x400dce05 - <concurrent_queue::TryIter<T> as core::iter::traits::iterator::Iterator>::next
at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/concurrent-queue-2.5.0/src/lib.rs:494
0x400dcb14 - async_io::reactor::Reactor::process_timers
at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/async-io-2.4.0/src/reactor.rs:211
0x400dceaa - async_io::reactor::ReactorLock::react
at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/async-io-2.4.0/src/reactor.rs:280
0x400ddbae - async_io::driver::main_loop
at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/async-io-2.4.0/src/driver.rs:69
0x400df55c - async_io::driver::unparker::{{closure}}::{{closure}}
at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-6f17d22bba15001f/async-io-2.4.0/src/driver.rs:33
0x400dd80b - std::thread::Builder::spawn_unchecked_::{{closure}}::{{closure}}
at /opt/rust/rustup/toolchains/esp/lib/rustlib/src/rust/library/std/src/thread/mod.rs:538
0x400f8b33 - <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once
at /opt/rust/rustup/toolchains/esp/lib/rustlib/src/rust/library/alloc/src/boxed.rs:2454
0x400f89b4 - std::sys::pal::unix::thread::Thread::new::thread_start
at /opt/rust/rustup/toolchains/esp/lib/rustlib/src/rust/library/std/src/sys/pal/unix/thread.rs:105
0x4010e95c - pthread_task_func
at /opt/esp/esp-idf/components/pthread/pthread.c:196
ELF file SHA256: 000000000
CPU halted.
Bug with 1.85
After a few minutes the program will crash with double exception of load prohibited (see below) or deadlock as well
I (16759) cqueue_deadlock: next loop
Guru Meditation Error: Core 0 panic'ed (Double exception).
Core 0 register dump:
PC : 0x40095722 PS : 0x00040c36 A0 : 0x00000027 A1 : 0x3ffe77c0
A2 : 0x3ffb3dc0 A3 : 0x00000000 A4 : 0x00000000 A5 : 0x00000000
0x3ffb3dc0 - async_io::reactor::Reactor::get::REACTOR
at ??:??
A6 : 0x0000c350 A7 : 0x3ffdd090 A8 : 0x800de157 A9 : 0x3ffe7780
A10 : 0x00000002 A11 : 0x3ffcc7ac A12 : 0x00000000 A13 : 0x3ffcc7b8
A14 : 0x0000c350 A15 : 0x3ffcc7b4 SAR : 0x0000000c EXCCAUSE: 0x00000002
EXCVADDR: 0x400840c2 LBEG : 0x40083b65 LEND : 0x40083b6d LCOUNT : 0x00000027
0x40083b65 - esp_timer_impl_get_counter_reg
at /opt/esp/esp-idf/components/esp_timer/src/esp_timer_impl_lac.c:118
0x40083b6d - esp_timer_impl_get_counter_reg
at /opt/esp/esp-idf/components/esp_timer/src/esp_timer_impl_lac.c:128
Backtrace: 0x4009571f:0x3ffe77c0 0x00000024:0x400840f2 |<-CORRUPTED
ELF file SHA256: 000000000
CPU halted.
Guru Meditation Error: Core 0 panic'ed (LoadProhibited). Exception was unhandled.
Core 0 register dump:
PC : 0x400df89b PS : 0x00060c30 A0 : 0x00060423 A1 : 0x3ffbd8e0
0x400df89b - core::task::wake::Context::waker
at /opt/rust/rustup/toolchains/esp/lib/rustlib/src/rust/library/core/src/task/wake.rs:250
A2 : 0x3ffbfdb4 A3 : 0x00000004 A4 : 0x00060820 A5 : 0x00000000
A6 : 0x00000000 A7 : 0x00000000 A8 : 0x00060820 A9 : 0x3ffbd8c0
A10 : 0x3ffb3dc0 A11 : 0xffffffff A12 : 0x0000004c A13 : 0x00000000
0x3ffb3dc0 - async_io::reactor::Reactor::get::REACTOR
at ??:??
A14 : 0x0000004c A15 : 0x00000002 SAR : 0x0000000c EXCCAUSE: 0x0000001c
EXCVADDR: 0x00060820 LBEG : 0x40083b65 LEND : 0x40083b6d LCOUNT : 0x00000027
0x40083b65 - esp_timer_impl_get_counter_reg
at /opt/esp/esp-idf/components/esp_timer/src/esp_timer_impl_lac.c:118
0x40083b6d - esp_timer_impl_get_counter_reg
at /opt/esp/esp-idf/components/esp_timer/src/esp_timer_impl_lac.c:128
Backtrace: 0x400df898:0x3ffbd8e0 0x00060420:0x3ffbd910 |<-CORRUPTED
0x400df898 - async_lock::once_cell::OnceCell<T>::get_or_init_blocking
at /host/.micro-rdk-docker-caches/cargo-registry/registry/src/index.crates.io-1949cf8c6b5b557f/async-lock-3.4.0/src/once_cell.rs:516