From 3b6d812f42351532197bdf86beef3da91174be45 Mon Sep 17 00:00:00 2001 From: Aleksander Wasaznik Date: Thu, 9 Oct 2025 16:30:50 +0200 Subject: [PATCH 1/4] Bluetooth: Host: Don't call user callback from TX thread ATT is invoking user callbacks in its net_buf destroy function. It is common practice that these callbacks can block on bt_hci_cmd_alloc(). This is a deadlock when the net_buf_unref() happens inside the HCI driver, invoked from tx_processor. Blocking callbacks like this appear in our own samples. See further down about how this problem was detected. tx_processor not protect against blocking callbacks so it is de-facto forbidden. The Host should not equip net_bufs with dangerous destroy callbacks. This commit makes ATT defer its net_buf destruction and user callback invocation to the system workqueue, so that net_buf_unref is safe to call from non-blocking threads. In the case of the deadlock, the net_buf_unref() was below the tx_processor in the call stack, which (at the time of this commit) is on the system work queue, so defering it to the system work queue is preserving the existing behavior. Future improvement may be to allow the user to provide their own workqueue for ATT callbacks. This deadlock was detected because the following test was failing while moving tx_processor to the bt_taskq: tests/bsim/bluetooth/ll/throughput/tests_scripts/gatt_write.sh The above test has an ATT callback `write_cmd_cb` invokes `bt_conn_le_param_update` can block waiting for `tx_processor`. The reason it was not failing while tx_processor was on the system work queue is that the GATT API has a special non-blocking behavior when called from the system work queue. Signed-off-by: Aleksander Wasaznik --- subsys/bluetooth/host/att.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/subsys/bluetooth/host/att.c b/subsys/bluetooth/host/att.c index 66833d1cfcc53..bf011bef1214c 100644 --- a/subsys/bluetooth/host/att.c +++ b/subsys/bluetooth/host/att.c @@ -248,8 +248,13 @@ const char *bt_att_err_to_str(uint8_t att_err) } #endif /* CONFIG_BT_ATT_ERR_TO_STR */ -static void att_tx_destroy(struct net_buf *buf) +static void att_tx_destroy_work_handler(struct k_work *work); +static K_WORK_DEFINE(att_tx_destroy_work, att_tx_destroy_work_handler); +static sys_slist_t tx_destroy_queue; + +static void att_tx_destroy_work_handler(struct k_work *work) { + struct net_buf *buf = net_buf_slist_get(&tx_destroy_queue); struct bt_att_tx_meta_data *p_meta = att_get_tx_meta_data(buf); struct bt_att_tx_meta_data meta; @@ -278,6 +283,33 @@ static void att_tx_destroy(struct net_buf *buf) if (meta.opcode != 0) { att_on_sent_cb(&meta); } + + if (!sys_slist_is_empty(&tx_destroy_queue)) { + k_work_submit_to_queue(NULL, work); + } +} + +static void att_tx_destroy(struct net_buf *buf) +{ + /* We need to invoke att_on_sent_cb, which may block. We + * don't want to block in a net buf destroy callback, so we + * defer to a sensible workqueue. + * + * bt_workq cannot be used because it currently forms a + * deadlock with att_pool: bt_workq -> btt_att_recv -> + * send_err_rsp waits for att pool. + * + * We use the system work queue to preserve earlier + * behavior. The tx_processor used to run on the system work + * queue, and it could end up here: tx_processor -> + * bt_hci_send -> net_buf_unref. + * + * A possible alternative is tx_notify_workqueue_get() since + * this workqueue is processing similar "completion" events. + */ + net_buf_slist_put(&tx_destroy_queue, buf); + k_work_submit(&att_tx_destroy_work); + /* Continues in att_tx_destroy_work_handler() */ } NET_BUF_POOL_DEFINE(att_pool, CONFIG_BT_ATT_TX_COUNT, From 439b079b91a9faacc97a2b107e36edaa001ba53a Mon Sep 17 00:00:00 2001 From: Aleksander Wasaznik Date: Fri, 10 Oct 2025 12:18:56 +0200 Subject: [PATCH 2/4] Bluetooth: Samples: Reduce RAM requirement of peripheral_identity Reduce BT_MAX_CONN from 62 to 61 to make it build on integration platform qemu_cortex_m3/ti_lm3s6965 when we add bt_taskq in subsequent commit. Signed-off-by: Aleksander Wasaznik --- samples/bluetooth/peripheral_identity/prj.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/bluetooth/peripheral_identity/prj.conf b/samples/bluetooth/peripheral_identity/prj.conf index 8bd97851e3684..d7847c0f7a323 100644 --- a/samples/bluetooth/peripheral_identity/prj.conf +++ b/samples/bluetooth/peripheral_identity/prj.conf @@ -6,8 +6,8 @@ CONFIG_BT_PRIVACY=y CONFIG_BT_DEVICE_NAME="Zephyr Peripheral" CONFIG_BT_GAP_AUTO_UPDATE_CONN_PARAMS=n -CONFIG_BT_MAX_CONN=62 -CONFIG_BT_ID_MAX=62 +CONFIG_BT_MAX_CONN=61 +CONFIG_BT_ID_MAX=61 # CONFIG_BT_SMP=y # CONFIG_BT_MAX_PAIRED=62 From babc1e046a98ed0f8ab043dad67f07f77e3c9f0f Mon Sep 17 00:00:00 2001 From: Pavel Vasilyev Date: Thu, 16 Oct 2025 12:47:00 +0200 Subject: [PATCH 3/4] Bluetooth: Host: Run tx processor on its own thread When thread that TX processor is used to send commands and data to Controller is also used for sync commands sending and command buffer allocation, a deadlock happens. This thread is used to avoid such deadlocks by moving TX processor to its own dedicated thread exclusively used by tx processor only. Signed-off-by: Pavel Vasilyev Signed-off-by: Aleksander Wasaznik --- subsys/bluetooth/host/Kconfig | 26 ++++++++++++++++++++++++++ subsys/bluetooth/host/hci_core.c | 26 ++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/subsys/bluetooth/host/Kconfig b/subsys/bluetooth/host/Kconfig index 72deb4343dc45..a35f49fa2094e 100644 --- a/subsys/bluetooth/host/Kconfig +++ b/subsys/bluetooth/host/Kconfig @@ -132,6 +132,32 @@ config BT_DRIVER_RX_HIGH_PRIO int default 6 +config BT_TX_PROCESSOR_THREAD + # Hidden unconfigurable option + bool + # TODO: Remove default n just after Zephyr 4.3 release. This + # feature increases the RAM use by 1kB. We want to enable it + # at the least disruptive moment in the release cycle. + default n + default y if !SOC_SERIES_NRF51X + # This option is automatically selected for all platforms except nRF51 + # due to limited RAM on nRF51 devices. + # + # This thread is used to send pending HCI Commands, ACL and ISO data to + # Controller. + +if BT_TX_PROCESSOR_THREAD + +config BT_TX_PROCESSOR_THREAD_PRIO + int + default -1 + +config BT_TX_PROCESSOR_STACK_SIZE + int + default 1024 + +endif + config BT_CONN_TX_NOTIFY_WQ bool "Use a separate workqueue for connection TX notify processing [EXPERIMENTAL]" depends on BT_CONN_TX diff --git a/subsys/bluetooth/host/hci_core.c b/subsys/bluetooth/host/hci_core.c index 33306596b2ce7..95682a039ff12 100644 --- a/subsys/bluetooth/host/hci_core.c +++ b/subsys/bluetooth/host/hci_core.c @@ -5033,8 +5033,34 @@ static void tx_processor(struct k_work *item) static K_WORK_DEFINE(tx_work, tx_processor); +#if defined(CONFIG_BT_TX_PROCESSOR_THREAD) +static K_THREAD_STACK_DEFINE(bt_tx_processor_stack, CONFIG_BT_TX_PROCESSOR_STACK_SIZE); +static struct k_work_q bt_tx_processor_workq; + +__maybe_unused static int bt_tx_processor_init(void) +{ + struct k_work_queue_config cfg = {}; + + if (IS_ENABLED(CONFIG_THREAD_NAME)) { + cfg.name = "bt_tx_processor"; + } + + k_work_queue_start(&bt_tx_processor_workq, bt_tx_processor_stack, + K_THREAD_STACK_SIZEOF(bt_tx_processor_stack), + CONFIG_BT_TX_PROCESSOR_THREAD_PRIO, &cfg); + + return 0; +} + +SYS_INIT(bt_tx_processor_init, POST_KERNEL, 999); +#endif /* CONFIG_BT_TX_PROCESSOR_THREAD */ + void bt_tx_irq_raise(void) { LOG_DBG("kick TX"); +#if defined(CONFIG_BT_TX_PROCESSOR_THREAD) + k_work_submit_to_queue(&bt_tx_processor_workq, &tx_work); +#else k_work_submit(&tx_work); +#endif } From adb845774c5fbce9d5bec1d419637d4d68b1a436 Mon Sep 17 00:00:00 2001 From: Aleksander Wasaznik Date: Tue, 21 Oct 2025 16:50:58 +0200 Subject: [PATCH 4/4] Bluetooth: Host: Disable bt_hci_cmd_send_sync workaround The workaround in bt_cmd_send_sync should no longer by needed when tx_processor is not on the system work queue. Signed-off-by: Aleksander Wasaznik --- subsys/bluetooth/host/hci_core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/subsys/bluetooth/host/hci_core.c b/subsys/bluetooth/host/hci_core.c index 95682a039ff12..d75438afcbe48 100644 --- a/subsys/bluetooth/host/hci_core.c +++ b/subsys/bluetooth/host/hci_core.c @@ -472,10 +472,11 @@ int bt_hci_cmd_send_sync(uint16_t opcode, struct net_buf *buf, /* TODO: disallow sending sync commands from syswq altogether */ - /* Since the commands are now processed in the syswq, we cannot suspend - * and wait. We have to send the command from the current context. + /* If the commands are processed in the syswq and we are on the + * syswq, then we cannot suspend and wait. We have to send the + * command from the current context. */ - if (k_current_get() == &k_sys_work_q.thread) { + if (!IS_ENABLED(CONFIG_BT_TX_PROCESSOR_THREAD) && k_current_get() == &k_sys_work_q.thread) { /* drain the command queue until we get to send the command of interest. */ struct net_buf *cmd = NULL;