Skip to content

Commit d8689cd

Browse files
Johan Hedbergjhedberg
authored andcommitted
Bluetooth: host: Fix deadlocks with pending TX packet handling
This is a moderate redesign of the pending TX packet handling that aims to eliminate potential deadlocks between the TX thread and the system workqueue thread. The main changes are: - TX context (bt_conn_tx) is allocated during buffer allocation, i.e. not in the TX thread. - We don't allocate a TX context unless there's an associated callback. When there's no callback simple integer counters are used for tracking. - The TX thread is no longer responsible for TX callbacks or scheduling of TX callbacks. Instead, the callbacks get directly scheduled (k_work_submit) from the RX priority thread. - CONFIG_BT_CONN_TX_MAX defaults to CONFIG_BT_L2CAP_TX_BUF_COUNT, and in most cases wont need changing. The value now only indicates how many pending packets with a callback are possible. Signed-off-by: Johan Hedberg <[email protected]>
1 parent 28f8947 commit d8689cd

File tree

6 files changed

+136
-157
lines changed

6 files changed

+136
-157
lines changed

subsys/bluetooth/host/Kconfig

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -240,13 +240,13 @@ config BT_ACL_RX_COUNT
240240
endif # BT_HCI_ACL_FLOW_CONTROL
241241

242242
config BT_CONN_TX_MAX
243-
int "Maximum number of pending TX buffers"
244-
default BT_CTLR_TX_BUFFERS if BT_CTLR
245-
default 7
246-
range 1 128
243+
int "Maximum number of pending TX buffers with a callback"
244+
default BT_L2CAP_TX_BUF_COUNT
245+
range BT_L2CAP_TX_BUF_COUNT 255
247246
help
248-
Maximum number of pending TX buffers that have not yet
249-
been acknowledged by the controller.
247+
Maximum number of pending TX buffers that have an associated
248+
callback. Normally this can be left to the default value, which
249+
is equal to the number of TX buffers in the stack-internal pool.
250250

251251
config BT_AUTO_PHY_UPDATE
252252
bool "Auto-initiate PHY Update Procedure"

subsys/bluetooth/host/Kconfig.l2cap

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ endif # BT_HCI_ACL_FLOW_CONTROL
1919

2020
config BT_L2CAP_TX_BUF_COUNT
2121
int "Number of L2CAP TX buffers"
22+
default BT_CTLR_TX_BUFFERS if BT_CTLR
2223
default 3
2324
range 2 255
2425
help

subsys/bluetooth/host/conn.c

Lines changed: 98 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,11 @@ const struct bt_conn_auth_cb *bt_auth;
6868
static struct bt_conn conns[CONFIG_BT_MAX_CONN];
6969
static struct bt_conn_cb *callback_list;
7070

71-
#define conn_tx(buf) ((struct bt_conn_tx_data *)net_buf_user_data(buf))
71+
struct tx_meta {
72+
struct bt_conn_tx *tx;
73+
};
74+
75+
#define tx_data(buf) ((struct tx_meta *)net_buf_user_data(buf))
7276

7377
static struct bt_conn_tx conn_tx[CONFIG_BT_CONN_TX_MAX];
7478
K_FIFO_DEFINE(free_tx);
@@ -307,31 +311,34 @@ static void tx_free(struct bt_conn_tx *tx)
307311
tx->conn = NULL;
308312
}
309313

310-
tx->data.cb = NULL;
311-
tx->data.user_data = NULL;
314+
tx->cb = NULL;
315+
tx->user_data = NULL;
316+
tx->pending_no_cb = 0U;
312317
k_fifo_put(&free_tx, tx);
313318
}
314319

315320
static void tx_notify_cb(struct k_work *work)
316321
{
317322
struct bt_conn_tx *tx = CONTAINER_OF(work, struct bt_conn_tx, work);
318323
struct bt_conn *conn;
319-
struct bt_conn_tx_data data;
324+
bt_conn_tx_cb_t cb;
325+
void *user_data;
320326

321-
BT_DBG("tx %p conn %p cb %p user_data %p", tx, tx->conn, tx->data.cb,
322-
tx->data.user_data);
327+
BT_DBG("tx %p conn %p cb %p user_data %p", tx, tx->conn, tx->cb,
328+
tx->user_data);
323329

324330
/* Copy over the params */
325331
conn = bt_conn_ref(tx->conn);
326-
data = tx->data;
332+
cb = tx->cb;
333+
user_data = tx->user_data;
327334

328335
/* Free up TX notify since there may be user waiting */
329336
tx_free(tx);
330337

331338
/* Run the callback, at this point it should be safe to allocate new
332339
* buffers since the TX should have been unblocked by tx_free.
333340
*/
334-
data.cb(conn, data.user_data);
341+
cb(conn, user_data);
335342

336343
bt_conn_unref(conn);
337344
}
@@ -1188,130 +1195,60 @@ void bt_conn_recv(struct bt_conn *conn, struct net_buf *buf, u8_t flags)
11881195
bt_l2cap_recv(conn, buf);
11891196
}
11901197

1191-
int bt_conn_send_cb(struct bt_conn *conn, struct net_buf *buf,
1192-
bt_conn_tx_cb_t cb, void *user_data)
1198+
static struct bt_conn_tx *conn_tx_alloc(void)
11931199
{
1194-
BT_DBG("conn handle %u buf len %u cb %p user_data %p", conn->handle,
1195-
buf->len, cb, user_data);
1196-
1197-
if (conn->state != BT_CONN_CONNECTED) {
1198-
BT_ERR("not connected!");
1199-
net_buf_unref(buf);
1200-
return -ENOTCONN;
1201-
}
1202-
1203-
conn_tx(buf)->cb = cb;
1204-
conn_tx(buf)->user_data = user_data;
1200+
if (IS_ENABLED(CONFIG_BT_DEBUG_CONN)) {
1201+
struct bt_conn_tx *tx = k_fifo_get(&free_tx, K_NO_WAIT);
12051202

1206-
net_buf_put(&conn->tx_queue, buf);
1207-
return 0;
1208-
}
1203+
if (!tx) {
1204+
BT_WARN("Unable to get a free conn_tx, yielding...");
1205+
tx = k_fifo_get(&free_tx, K_FOREVER);
1206+
}
12091207

1210-
static bool conn_tx_internal(bt_conn_tx_cb_t cb)
1211-
{
1212-
/* Only functions which are known to not block on anything that may
1213-
* depend on the TX thread itself, thereby causing a deadlock, are
1214-
* considered internal in this context.
1215-
* This means that e.g. SMP callbacks cannot be considered internal,
1216-
* since they may perform application callbacks and dependency on the
1217-
* TX thread is then out of our control.
1218-
*/
1219-
if (cb == att_pdu_sent || cb == att_cfm_sent || cb == att_rsp_sent ||
1220-
#if defined(CONFIG_BT_L2CAP_DYNAMIC_CHANNEL)
1221-
cb == l2cap_chan_sdu_sent ||
1222-
#endif /* CONFIG_BT_L2CAP_DYNAMIC_CHANNEL */
1223-
cb == att_req_sent) {
1224-
return true;
1208+
return tx;
12251209
}
12261210

1227-
return false;
1211+
return k_fifo_get(&free_tx, K_FOREVER);
12281212
}
12291213

1230-
void bt_conn_notify_tx(struct bt_conn *conn)
1214+
int bt_conn_send_cb(struct bt_conn *conn, struct net_buf *buf,
1215+
bt_conn_tx_cb_t cb, void *user_data)
12311216
{
12321217
struct bt_conn_tx *tx;
12331218

1234-
BT_DBG("conn %p", conn);
1235-
1236-
while ((tx = k_fifo_get(&conn->tx_notify, K_NO_WAIT))) {
1237-
BT_DBG("cb %p user_data %p", tx->data.cb, tx->data.user_data);
1238-
1239-
/* Only submit if there is a callback set */
1240-
if (tx->data.cb) {
1241-
/* Submit using TX thread if internal callback */
1242-
if (conn_tx_internal(tx->data.cb)) {
1243-
tx_notify_cb(&tx->work);
1244-
} else {
1245-
k_work_submit(&tx->work);
1246-
}
1247-
} else {
1248-
tx_free(tx);
1249-
}
1250-
}
1251-
}
1252-
1253-
static void notify_tx(void)
1254-
{
1255-
int i;
1256-
1257-
for (i = 0; i < ARRAY_SIZE(conns); i++) {
1258-
if (!atomic_get(&conns[i].ref)) {
1259-
continue;
1260-
}
1219+
BT_DBG("conn handle %u buf len %u cb %p user_data %p", conn->handle,
1220+
buf->len, cb, user_data);
12611221

1262-
if (conns[i].state == BT_CONN_CONNECTED ||
1263-
conns[i].state == BT_CONN_DISCONNECT) {
1264-
bt_conn_notify_tx(&conns[i]);
1265-
}
1222+
if (conn->state != BT_CONN_CONNECTED) {
1223+
BT_ERR("not connected!");
1224+
net_buf_unref(buf);
1225+
return -ENOTCONN;
12661226
}
1267-
}
12681227

1269-
static struct bt_conn_tx *add_pending_tx(struct bt_conn *conn,
1270-
bt_conn_tx_cb_t cb, void *user_data)
1271-
{
1272-
struct bt_conn_tx *tx;
1273-
unsigned int key;
1228+
if (cb) {
1229+
tx = conn_tx_alloc();
1230+
tx->cb = cb;
1231+
tx->user_data = user_data;
1232+
tx->conn = bt_conn_ref(conn);
1233+
tx->pending_no_cb = 0U;
1234+
k_work_init(&tx->work, tx_notify_cb);
12741235

1275-
BT_DBG("conn %p cb %p user_data %p", conn, cb, user_data);
1276-
1277-
if (IS_ENABLED(CONFIG_BT_DEBUG_CONN)) {
1278-
tx = k_fifo_get(&free_tx, K_NO_WAIT);
1279-
if (!tx) {
1280-
BT_WARN("Unable to get a free conn_tx, yielding...");
1281-
tx = k_fifo_get(&free_tx, K_FOREVER);
1282-
}
1236+
tx_data(buf)->tx = tx;
12831237
} else {
1284-
tx = k_fifo_get(&free_tx, K_FOREVER);
1238+
tx_data(buf)->tx = NULL;
12851239
}
12861240

1287-
tx->conn = bt_conn_ref(conn);
1288-
k_work_init(&tx->work, tx_notify_cb);
1289-
tx->data.cb = cb;
1290-
tx->data.user_data = user_data;
1291-
1292-
key = irq_lock();
1293-
sys_slist_append(&conn->tx_pending, &tx->node);
1294-
irq_unlock(key);
1295-
1296-
return tx;
1297-
}
1298-
1299-
static void remove_pending_tx(struct bt_conn *conn, struct bt_conn_tx *tx)
1300-
{
1301-
unsigned int key;
1302-
1303-
key = irq_lock();
1304-
sys_slist_find_and_remove(&conn->tx_pending, &tx->node);
1305-
irq_unlock(key);
1306-
1307-
tx_free(tx);
1241+
net_buf_put(&conn->tx_queue, buf);
1242+
return 0;
13081243
}
13091244

13101245
static bool send_frag(struct bt_conn *conn, struct net_buf *buf, u8_t flags,
13111246
bool always_consume)
13121247
{
1248+
struct bt_conn_tx *tx = tx_data(buf)->tx;
13131249
struct bt_hci_acl_hdr *hdr;
1314-
struct bt_conn_tx *tx;
1250+
u32_t *pending_no_cb;
1251+
unsigned int key;
13151252
int err;
13161253

13171254
BT_DBG("conn %p buf %p len %u flags 0x%02x", conn, buf, buf->len,
@@ -1320,9 +1257,6 @@ static bool send_frag(struct bt_conn *conn, struct net_buf *buf, u8_t flags,
13201257
/* Wait until the controller can accept ACL packets */
13211258
k_sem_take(bt_conn_get_pkts(conn), K_FOREVER);
13221259

1323-
/* Make sure we notify and free up any pending tx contexts */
1324-
notify_tx();
1325-
13261260
/* Check for disconnection while waiting for pkts_sem */
13271261
if (conn->state != BT_CONN_CONNECTED) {
13281262
goto fail;
@@ -1333,21 +1267,48 @@ static bool send_frag(struct bt_conn *conn, struct net_buf *buf, u8_t flags,
13331267
hdr->len = sys_cpu_to_le16(buf->len - sizeof(*hdr));
13341268

13351269
/* Add to pending, it must be done before bt_buf_set_type */
1336-
tx = add_pending_tx(conn, conn_tx(buf)->cb, conn_tx(buf)->user_data);
1270+
key = irq_lock();
1271+
if (tx) {
1272+
sys_slist_append(&conn->tx_pending, &tx->node);
1273+
} else {
1274+
struct bt_conn_tx *tail_tx;
1275+
1276+
tail_tx = (void *)sys_slist_peek_tail(&conn->tx_pending);
1277+
if (tail_tx) {
1278+
pending_no_cb = &tail_tx->pending_no_cb;
1279+
} else {
1280+
pending_no_cb = &conn->pending_no_cb;
1281+
}
1282+
1283+
(*pending_no_cb)++;
1284+
}
1285+
irq_unlock(key);
13371286

13381287
bt_buf_set_type(buf, BT_BUF_ACL_OUT);
13391288

13401289
err = bt_send(buf);
13411290
if (err) {
13421291
BT_ERR("Unable to send to driver (err %d)", err);
1343-
remove_pending_tx(conn, tx);
1292+
key = irq_lock();
1293+
/* Roll back the pending TX info */
1294+
if (tx) {
1295+
sys_slist_find_and_remove(&conn->tx_pending, &tx->node);
1296+
} else {
1297+
__ASSERT_NO_MSG(*pending_no_cb > 0);
1298+
(*pending_no_cb)--;
1299+
}
1300+
irq_unlock(key);
13441301
goto fail;
13451302
}
13461303

13471304
return true;
13481305

13491306
fail:
13501307
k_sem_give(bt_conn_get_pkts(conn));
1308+
if (tx) {
1309+
tx_free(tx);
1310+
}
1311+
13511312
if (always_consume) {
13521313
net_buf_unref(buf);
13531314
}
@@ -1382,8 +1343,7 @@ static struct net_buf *create_frag(struct bt_conn *conn, struct net_buf *buf)
13821343
}
13831344

13841345
/* Fragments never have a TX completion callback */
1385-
conn_tx(frag)->cb = NULL;
1386-
conn_tx(frag)->user_data = NULL;
1346+
tx_data(frag)->tx = NULL;
13871347

13881348
frag_len = MIN(conn_mtu(conn), net_buf_tailroom(frag));
13891349

@@ -1441,12 +1401,15 @@ static void conn_cleanup(struct bt_conn *conn)
14411401

14421402
/* Give back any allocated buffers */
14431403
while ((buf = net_buf_get(&conn->tx_queue, K_NO_WAIT))) {
1404+
if (tx_data(buf)->tx) {
1405+
tx_free(tx_data(buf)->tx);
1406+
}
1407+
14441408
net_buf_unref(buf);
14451409
}
14461410

14471411
__ASSERT(sys_slist_is_empty(&conn->tx_pending), "Pending TX packets");
1448-
1449-
bt_conn_notify_tx(conn);
1412+
__ASSERT_NO_MSG(conn->pending_no_cb == 0);
14501413

14511414
bt_conn_reset_rx_state(conn);
14521415

@@ -1482,12 +1445,6 @@ int bt_conn_prepare_events(struct k_poll_event events[])
14821445

14831446
BT_DBG("Adding conn %p to poll list", conn);
14841447

1485-
k_poll_event_init(&events[ev_count],
1486-
K_POLL_TYPE_FIFO_DATA_AVAILABLE,
1487-
K_POLL_MODE_NOTIFY_ONLY,
1488-
&conn->tx_notify);
1489-
events[ev_count++].tag = BT_EVENT_CONN_TX_NOTIFY;
1490-
14911448
k_poll_event_init(&events[ev_count],
14921449
K_POLL_TYPE_FIFO_DATA_AVAILABLE,
14931450
K_POLL_MODE_NOTIFY_ONLY,
@@ -1544,18 +1501,34 @@ static void process_unack_tx(struct bt_conn *conn)
15441501
{
15451502
/* Return any unacknowledged packets */
15461503
while (1) {
1504+
struct bt_conn_tx *tx;
15471505
sys_snode_t *node;
15481506
unsigned int key;
15491507

15501508
key = irq_lock();
1509+
1510+
if (conn->pending_no_cb) {
1511+
conn->pending_no_cb--;
1512+
irq_unlock(key);
1513+
k_sem_give(bt_conn_get_pkts(conn));
1514+
continue;
1515+
}
1516+
15511517
node = sys_slist_get(&conn->tx_pending);
15521518
irq_unlock(key);
15531519

15541520
if (!node) {
15551521
break;
15561522
}
15571523

1558-
tx_free(CONTAINER_OF(node, struct bt_conn_tx, node));
1524+
tx = CONTAINER_OF(node, struct bt_conn_tx, node);
1525+
1526+
key = irq_lock();
1527+
conn->pending_no_cb = tx->pending_no_cb;
1528+
tx->pending_no_cb = 0U;
1529+
irq_unlock(key);
1530+
1531+
tx_free(tx);
15591532

15601533
k_sem_give(bt_conn_get_pkts(conn));
15611534
}
@@ -1602,7 +1575,6 @@ void bt_conn_set_state(struct bt_conn *conn, bt_conn_state_t state)
16021575
break;
16031576
}
16041577
k_fifo_init(&conn->tx_queue);
1605-
k_fifo_init(&conn->tx_notify);
16061578
k_poll_signal_raise(&conn_change, 0);
16071579

16081580
sys_slist_init(&conn->channels);

0 commit comments

Comments
 (0)