From a64b617e1947dae2baaf879c5be4188e239c5859 Mon Sep 17 00:00:00 2001 From: Aleksander Wasaznik Date: Fri, 10 Oct 2025 15:31:28 +0200 Subject: [PATCH 1/6] Bluetooth: Host: Expose bt_workq for global Host use There are other places in the Host that would make sense to run on bt_workq. This change exposes `bt_workq_chosen` in `hci_core.h` for use in other parts of the Host. `bt_workq_chosen` is set according to the `BT_RECV_CONTEXT` choice. Signed-off-by: Aleksander Wasaznik --- subsys/bluetooth/host/hci_core.c | 11 ++++++----- subsys/bluetooth/host/hci_core.h | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/subsys/bluetooth/host/hci_core.c b/subsys/bluetooth/host/hci_core.c index 33306596b2ce7..a60265bfc078c 100644 --- a/subsys/bluetooth/host/hci_core.c +++ b/subsys/bluetooth/host/hci_core.c @@ -124,6 +124,9 @@ static struct k_work_q bt_workq; static K_KERNEL_STACK_DEFINE(rx_thread_stack, CONFIG_BT_RX_STACK_SIZE); #endif /* CONFIG_BT_RECV_WORKQ_BT */ +struct k_work_q *const bt_workq_chosen = + COND_CODE_1(CONFIG_BT_RECV_WORKQ_BT, (&bt_workq), (&k_sys_work_q)); + static void init_work(struct k_work *work); struct bt_dev bt_dev = { @@ -4350,13 +4353,11 @@ static void hci_event_prio(struct net_buf *buf) static void rx_queue_put(struct net_buf *buf) { + int err; + net_buf_slist_put(&bt_dev.rx_queue, buf); -#if defined(CONFIG_BT_RECV_WORKQ_SYS) - const int err = k_work_submit(&rx_work); -#elif defined(CONFIG_BT_RECV_WORKQ_BT) - const int err = k_work_submit_to_queue(&bt_workq, &rx_work); -#endif /* CONFIG_BT_RECV_WORKQ_SYS */ + err = k_work_submit_to_queue(bt_workq_chosen, &rx_work); if (err < 0) { LOG_ERR("Could not submit rx_work: %d", err); } diff --git a/subsys/bluetooth/host/hci_core.h b/subsys/bluetooth/host/hci_core.h index 315db2d388846..28b14763f390f 100644 --- a/subsys/bluetooth/host/hci_core.h +++ b/subsys/bluetooth/host/hci_core.h @@ -442,6 +442,7 @@ struct bt_dev { #endif }; +extern struct k_work_q *const bt_workq_chosen; extern struct bt_dev bt_dev; extern const struct bt_conn_auth_cb *bt_auth; extern sys_slist_t bt_auth_info_cbs; From 86b156b8496c8a2f41cb9d011d8deb6cf96a0e95 Mon Sep 17 00:00:00 2001 From: Aleksander Wasaznik Date: Thu, 9 Oct 2025 16:30:50 +0200 Subject: [PATCH 2/6] Bluetooth: Host: Don't call user callback from TX thread ATT is invoking user callbacks in its net_buf destroy function. It is common practice that these callbacks can block on bt_hci_cmd_alloc(), so we must consider these callbacks as blocking. This is a deadlock when the net_buf_unref() happens inside the HCI driver, driver invoked from tx_processor. Blocking callbacks like this appear in our own samples. See further down about how this problem was detected. Currently, tx_processor not protect against blocking callbacks so it is de-facto forbidden. The Host should not equip net_bufs with dangerous destroy callbacks. This commit makes ATT defer its net_buf destruction and user callback invocation to the system workqueue, so that net_buf_unref is safe to call from non-blocking threads. Unsafe code is banished to the system workqueue wild west. Future improvement may be to allow the user to provide their own workqueue for ATT callbacks. This deadlock was detected because the following test was failing while tx_processor to the bt_taskq: tests/bsim/bluetooth/ll/throughput/tests_scripts/gatt_write.sh The above test has an ATT callback `write_cmd_cb` invokes `bt_conn_le_param_update` can block waiting for `tx_processor`. Signed-off-by: Aleksander Wasaznik --- subsys/bluetooth/host/att.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/subsys/bluetooth/host/att.c b/subsys/bluetooth/host/att.c index 66833d1cfcc53..7e7871c663a13 100644 --- a/subsys/bluetooth/host/att.c +++ b/subsys/bluetooth/host/att.c @@ -248,8 +248,13 @@ const char *bt_att_err_to_str(uint8_t att_err) } #endif /* CONFIG_BT_ATT_ERR_TO_STR */ -static void att_tx_destroy(struct net_buf *buf) +static void att_tx_destroy_work_handler(struct k_work *work); +static K_WORK_DEFINE(att_tx_destroy_work, att_tx_destroy_work_handler); +static sys_slist_t tx_destroy_queue; + +static void att_tx_destroy_work_handler(struct k_work *work) { + struct net_buf *buf = net_buf_slist_get(&tx_destroy_queue); struct bt_att_tx_meta_data *p_meta = att_get_tx_meta_data(buf); struct bt_att_tx_meta_data meta; @@ -278,6 +283,17 @@ static void att_tx_destroy(struct net_buf *buf) if (meta.opcode != 0) { att_on_sent_cb(&meta); } + + if (!sys_slist_is_empty(&tx_destroy_queue)) { + k_work_submit_to_queue(bt_workq_chosen, &att_tx_destroy_work); + } +} + +static void att_tx_destroy(struct net_buf *buf) +{ + /* We need to invoke `att_on_sent_cb` which may block. Defer to bt_workq. */ + net_buf_slist_put(&tx_destroy_queue, buf); + k_work_submit_to_queue(bt_workq_chosen, &att_tx_destroy_work); /* att_tx_destroy_work_handler */ } NET_BUF_POOL_DEFINE(att_pool, CONFIG_BT_ATT_TX_COUNT, From 3635cd2fd7c27c578d96cf14179b649dedbd486e Mon Sep 17 00:00:00 2001 From: Aleksander Wasaznik Date: Fri, 10 Oct 2025 12:18:56 +0200 Subject: [PATCH 3/6] Bluetooth: Samples: Reduce RAM requirement of peripheral_identity Reduce BT_MAX_CONN from 62 to 61 to make it build on integration platform qemu_cortex_m3/ti_lm3s6965 when we add bt_taskq in subsequent commit. Signed-off-by: Aleksander Wasaznik --- samples/bluetooth/peripheral_identity/prj.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/bluetooth/peripheral_identity/prj.conf b/samples/bluetooth/peripheral_identity/prj.conf index 8bd97851e3684..d7847c0f7a323 100644 --- a/samples/bluetooth/peripheral_identity/prj.conf +++ b/samples/bluetooth/peripheral_identity/prj.conf @@ -6,8 +6,8 @@ CONFIG_BT_PRIVACY=y CONFIG_BT_DEVICE_NAME="Zephyr Peripheral" CONFIG_BT_GAP_AUTO_UPDATE_CONN_PARAMS=n -CONFIG_BT_MAX_CONN=62 -CONFIG_BT_ID_MAX=62 +CONFIG_BT_MAX_CONN=61 +CONFIG_BT_ID_MAX=61 # CONFIG_BT_SMP=y # CONFIG_BT_MAX_PAIRED=62 From 6fcafbe7df778b6b7266d8d2a96a9d9cb5d8cf56 Mon Sep 17 00:00:00 2001 From: Aleksander Wasaznik Date: Fri, 12 Sep 2025 17:25:39 +0200 Subject: [PATCH 4/6] Bluetooth: Host: Add bt_taskq workqueue for quick non-blocking tasks Add a new workqueue bt_taskq specifically designed for quick non-blocking work items in the Bluetooth subsystem. This workqueue is always available and does not depend on any Kconfig option. Signed-off-by: Aleksander Wasaznik --- subsys/bluetooth/host/CMakeLists.txt | 1 + subsys/bluetooth/host/Kconfig | 1 + subsys/bluetooth/host/Kconfig.bt_taskq | 58 ++++++++++++++++++++++++++ subsys/bluetooth/host/bt_taskq.c | 42 +++++++++++++++++++ subsys/bluetooth/host/bt_taskq.h | 40 ++++++++++++++++++ 5 files changed, 142 insertions(+) create mode 100644 subsys/bluetooth/host/Kconfig.bt_taskq create mode 100644 subsys/bluetooth/host/bt_taskq.c create mode 100644 subsys/bluetooth/host/bt_taskq.h diff --git a/subsys/bluetooth/host/CMakeLists.txt b/subsys/bluetooth/host/CMakeLists.txt index 0911c22c8f866..eee45dc111301 100644 --- a/subsys/bluetooth/host/CMakeLists.txt +++ b/subsys/bluetooth/host/CMakeLists.txt @@ -14,6 +14,7 @@ zephyr_library_sources_ifdef(CONFIG_BT_LONG_WQ long_wq.c) if(CONFIG_BT_HCI_HOST) zephyr_library_sources( + bt_taskq.c uuid.c addr.c buf.c diff --git a/subsys/bluetooth/host/Kconfig b/subsys/bluetooth/host/Kconfig index 72deb4343dc45..3c8257b0d5e3f 100644 --- a/subsys/bluetooth/host/Kconfig +++ b/subsys/bluetooth/host/Kconfig @@ -165,6 +165,7 @@ menu "Bluetooth Host" if BT_HCI_HOST +rsource "Kconfig.bt_taskq" rsource "../mesh/Kconfig" rsource "../audio/Kconfig" diff --git a/subsys/bluetooth/host/Kconfig.bt_taskq b/subsys/bluetooth/host/Kconfig.bt_taskq new file mode 100644 index 0000000000000..ed549253d0a75 --- /dev/null +++ b/subsys/bluetooth/host/Kconfig.bt_taskq @@ -0,0 +1,58 @@ +# bt_taskq configuration options + +# Copyright (c) 2025 Nordic Semiconductor +# SPDX-License-Identifier: Apache-2.0 + +choice BT_TASKQ_CONTEXT + prompt "bt_taskq thread selection" + # nRF51 is too small to have a dedicated thread + default BT_TASKQ_SYSTEM_WORKQUEUE if SOC_SERIES_NRF51X + default BT_TASKQ_DEDICATED + help + Selects in which context the bt_taskq runs. + + bt_taskq work is quick and non-blocking and must not be + blocked by other work. It should be on a work queue that + is exclusively for non-blocking work. + +config BT_TASKQ_DEDICATED + bool "Dedicated thread" + help + When this option is selected, the bt_taskq runs on a + dedicated thread. This is the default and safe option. + +config BT_TASKQ_SYSTEM_WORKQUEUE + bool "System workqueue" + help + When this option is selected, the bt_taskq is the system + workqueue. + + WARNING: This is safe only if there is no blocking work on + the system workqueue. + + This is currently NEVER SAFE to use as the Host itself + puts blocking work on the system workqueue. For now, this + option exists for users that need to free up RAM by not + having an extra thread and are willing to accept the risk + of deadlocks. When using this option, it is advised to + have a watchdog to recover from deadlocks. Risk of + deadlocks can be mitigated by being mindful of buffers and + whole-system analysis. + +endchoice + +config BT_TASKQ_STACK_SIZE_WITH_PROMPT + bool "bt_taskq thread stack size override" + depends on BT_TASKQ_DEDICATED + +config BT_TASKQ_STACK_SIZE + int + default 1024 + prompt "bt_taskq thread stack size" if BT_TASKQ_STACK_SIZE_WITH_PROMPT + +config BT_TASKQ_THREAD_PRIO + # Hidden option + int + # -1 is the least urgent cooperative priority. + # tx_processor() needs a cooperative thread for now. + default -1 diff --git a/subsys/bluetooth/host/bt_taskq.c b/subsys/bluetooth/host/bt_taskq.c new file mode 100644 index 0000000000000..5b16e1535b43d --- /dev/null +++ b/subsys/bluetooth/host/bt_taskq.c @@ -0,0 +1,42 @@ +/* bt_taskq.c - Workqueue for quick non-blocking Bluetooth tasks */ + +/* + * Copyright (c) 2025 Nordic Semiconductor ASA + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include + +static K_THREAD_STACK_DEFINE(bt_taskq_stack, CONFIG_BT_TASKQ_STACK_SIZE); +static struct k_work_q bt_taskq; + +__maybe_unused static int bt_taskq_init(void) +{ + struct k_work_queue_config cfg = {}; + + if (IS_ENABLED(CONFIG_THREAD_NAME)) { + cfg.name = "bt_taskq"; + } + + k_work_queue_start(&bt_taskq, bt_taskq_stack, K_THREAD_STACK_SIZEOF(bt_taskq_stack), + CONFIG_BT_TASKQ_THREAD_PRIO, &cfg); + + return 0; +} + +#if defined(CONFIG_BT_TASKQ_DEDICATED) +/* The init priority is set to POST_KERNEL 999, the last level + * before APPLICATION. + */ +SYS_INIT(bt_taskq_init, POST_KERNEL, 999); +#endif /* CONFIG_BT_TASKQ_DEDICATED */ + +/* Exports */ +struct k_work_q *const bt_taskq_chosen = + COND_CODE_1(CONFIG_BT_TASKQ_DEDICATED, (&bt_taskq), (&k_sys_work_q)); diff --git a/subsys/bluetooth/host/bt_taskq.h b/subsys/bluetooth/host/bt_taskq.h new file mode 100644 index 0000000000000..aa4421755a8cf --- /dev/null +++ b/subsys/bluetooth/host/bt_taskq.h @@ -0,0 +1,40 @@ +/* bt_taskq.h - Workqueue for quick non-blocking Bluetooth tasks */ + +/* + * Copyright (c) 2025 Nordic Semiconductor ASA + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +/** + * @brief Bluetooth task workqueue + * + * bt_taskq is a workqueue intended for quick non-blocking work + * items ("tasks") in the Bluetooth subsystem. This workqueue + * must always exist and is not controlled by any Kconfig + * option. + * + * Blocking means "waiting for something while running". A + * task is NOT allowed to block. If a task need to "wait", it + * should instead return immediately and schedule itself to run + * later. + * + * Work items submitted to this queue should be: + * - Quick to execute (non-blocking). + * - Not perform long-running operations. + * - Not block. + * + * @warning Non-blocking violation pitfalls: + * - net_buf_unref() on a foreign buffer could have a blocking + * destroy callback + * - Any user-defined callback might be blocking + * - Avoid any operations that could sleep or block the thread + * + * Use bt_long_wq for long-running or potentially blocking + * operations instead. + * + * Available in APPLICATION initialization level and later. + */ +extern struct k_work_q *const bt_taskq_chosen; From 8de7ceb58ea92fabacc44752d61caebff778ccc3 Mon Sep 17 00:00:00 2001 From: Aleksander Wasaznik Date: Tue, 23 Sep 2025 13:55:33 +0200 Subject: [PATCH 5/6] Bluetooth: Host: Move tx_processor to bt_taskq It's not safe for the tx_processor to share the system workqueue with work items that block the thread until tx_processor runs. This is a deadlock. The Bluetooth Host itself performs these operations, usually involving bt_hci_cmd_alloc(), on the system workqueue. This change effectively gives tx_processor its own thread, like the BT TX thread that used to exist. But, this time the thread is intended to be shared with any other non-blocking Bluetooth Host tasks. The bt_taskq rules tx_processor is supposed to be non-blocking and only have code under our control on the thread stack. Unfortunately, this is not entirely true currently. But we consider it close enough for now and will ensure it starts adhering to the rules in the future. Examples of problems: - The tx_processor invokes bt_hci_send(), driver code which has no rules limiting what it can do on our thread. - The tx_processor invokes net_buf_unref() on stack-external net_buf which executes user code on our thread. Signed-off-by: Aleksander Wasaznik --- subsys/bluetooth/host/hci_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/subsys/bluetooth/host/hci_core.c b/subsys/bluetooth/host/hci_core.c index a60265bfc078c..ba818e3f0ce32 100644 --- a/subsys/bluetooth/host/hci_core.c +++ b/subsys/bluetooth/host/hci_core.c @@ -48,6 +48,7 @@ #include "addr_internal.h" #include "adv.h" +#include "bt_taskq.h" #include "common/hci_common_internal.h" #include "common/bt_str.h" #include "common/rpa.h" @@ -5037,5 +5038,5 @@ static K_WORK_DEFINE(tx_work, tx_processor); void bt_tx_irq_raise(void) { LOG_DBG("kick TX"); - k_work_submit(&tx_work); + k_work_submit_to_queue(bt_taskq_chosen, &tx_work); } From 01bf284e8b6ca4b4ffd5fdbd3479fbb7cc0ba822 Mon Sep 17 00:00:00 2001 From: Aleksander Wasaznik Date: Fri, 10 Oct 2025 18:46:07 +0200 Subject: [PATCH 6/6] Bluetooth: Host: Conditional bt_cmd_send_sync workaround This commit disables the deadlock workaround in bt_cmd_send_sync when it's not needed, when tx_processor runs on bt_taskq and not on system workqueue. Signed-off-by: Aleksander Wasaznik --- subsys/bluetooth/host/hci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subsys/bluetooth/host/hci_core.c b/subsys/bluetooth/host/hci_core.c index ba818e3f0ce32..4561148ab7de4 100644 --- a/subsys/bluetooth/host/hci_core.c +++ b/subsys/bluetooth/host/hci_core.c @@ -479,7 +479,7 @@ int bt_hci_cmd_send_sync(uint16_t opcode, struct net_buf *buf, /* Since the commands are now processed in the syswq, we cannot suspend * and wait. We have to send the command from the current context. */ - if (k_current_get() == &k_sys_work_q.thread) { + if (!IS_ENABLED(CONFIG_BT_TASKQ_DEDICATED) && k_current_get() == &k_sys_work_q.thread) { /* drain the command queue until we get to send the command of interest. */ struct net_buf *cmd = NULL;