Skip to content

Commit edde06f

Browse files
authored
Avoid deadlock between transport and transaction (#4453)
1 parent b7cfd79 commit edde06f

File tree

2 files changed

+88
-14
lines changed

2 files changed

+88
-14
lines changed

pjsip/include/pjsip/sip_transaction.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ struct pjsip_transaction
141141
int retransmit_count;/**< Retransmission count. */
142142
pj_timer_entry retransmit_timer;/**< Retransmit timer. */
143143
pj_timer_entry timeout_timer; /**< Timeout timer. */
144+
pj_timer_entry misc_timer; /**< Miscellaneous timer. */
144145

145146
/** Module specific data. */
146147
void *mod_data[PJSIP_MAX_MODULE];

pjsip/src/pjsip/sip_transaction.c

Lines changed: 87 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ static int max_retrans_count = -1;
140140
#define TRANSPORT_ERR_TIMER 3
141141
#define TRANSPORT_DISC_TIMER 4
142142
#define TERMINATE_TIMER 5
143+
#define TRANSPORT_CB_TIMER 6
143144

144145
/* Flags for tsx_set_state() */
145146
enum
@@ -2265,23 +2266,21 @@ static void send_msg_callback( pjsip_send_state *send_state,
22652266
}
22662267

22672268

2268-
/* Transport callback. */
2269-
static void transport_callback(void *token, pjsip_tx_data *tdata,
2270-
pj_ssize_t sent)
2271-
{
2272-
pjsip_transaction *tsx = (pjsip_transaction*) token;
2269+
/* Transport callback parameter. */
2270+
struct tp_cb_param {
2271+
pjsip_transaction* tsx;
2272+
pjsip_tx_data* tdata;
2273+
pj_ssize_t sent;
2274+
};
22732275

2274-
/* Check if the transaction layer has been shutdown. */
2275-
if (mod_tsx_layer.mod.id < 0)
2276-
return;
22772276

2278-
/* In other circumstances, locking tsx->grp_lock AFTER transport mutex
2279-
* will introduce deadlock if another thread is currently sending a
2280-
* SIP message to the transport. But this should be safe as there should
2281-
* be no way this callback could be called while another thread is
2282-
* sending a message.
2283-
*/
2277+
/* Transport callback actual implementation. */
2278+
static void transport_callback_impl(pjsip_transaction *tsx,
2279+
pjsip_tx_data* tdata,
2280+
pj_ssize_t sent)
2281+
{
22842282
pj_grp_lock_acquire(tsx->grp_lock);
2283+
22852284
tsx->transport_flag &= ~(TSX_HAS_PENDING_TRANSPORT);
22862285

22872286
if (sent > 0 || tsx->role == PJSIP_ROLE_UAS) {
@@ -2299,6 +2298,7 @@ static void transport_callback(void *token, pjsip_tx_data *tdata,
22992298
tsx_set_state( tsx, PJSIP_TSX_STATE_DESTROYED,
23002299
PJSIP_EVENT_UNKNOWN, NULL, 0 );
23012300
pj_grp_lock_release(tsx->grp_lock);
2301+
pj_grp_lock_dec_ref(tsx->grp_lock);
23022302
return;
23032303
}
23042304

@@ -2354,6 +2354,79 @@ static void transport_callback(void *token, pjsip_tx_data *tdata,
23542354
}
23552355

23562356

2357+
/* Timer callback for transport callback.
2358+
* This is currently only used to avoid deadlock due to inversed locking order
2359+
* between transport and transaction.
2360+
*/
2361+
static void tsx_misc_timer_callback(pj_timer_heap_t *theap,
2362+
pj_timer_entry *entry)
2363+
{
2364+
PJ_UNUSED_ARG(theap);
2365+
2366+
if (entry->id == TRANSPORT_CB_TIMER) {
2367+
struct tp_cb_param* param = (struct tp_cb_param*)entry->user_data;
2368+
2369+
/* Check if the transaction layer has been shutdown. */
2370+
if (mod_tsx_layer.mod.id >= 0) {
2371+
/* Call transport callback implementation */
2372+
transport_callback_impl(param->tsx, param->tdata, param->sent);
2373+
}
2374+
2375+
/* Release tdata */
2376+
pjsip_tx_data_dec_ref(param->tdata);
2377+
}
2378+
}
2379+
2380+
2381+
/* Transport callback. */
2382+
static void transport_callback(void *token, pjsip_tx_data *tdata,
2383+
pj_ssize_t sent)
2384+
{
2385+
pjsip_transaction *tsx = (pjsip_transaction*) token;
2386+
pj_status_t status;
2387+
2388+
/* Check if the transaction layer has been shutdown. */
2389+
if (mod_tsx_layer.mod.id < 0)
2390+
return;
2391+
2392+
/* In other circumstances, locking tsx->grp_lock AFTER transport mutex
2393+
* will introduce deadlock if another thread is currently sending a
2394+
* SIP message to the transport. But this should be safe as there should
2395+
* be no way this callback could be called while another thread is
2396+
* sending a message.
2397+
*/
2398+
// Deadlock does happen, see #4453.
2399+
// So now, to avoid deadlock, we'll try to acquire the group lock first,
2400+
// and if it fails, we'll schedule the processing via timer.
2401+
status = pj_grp_lock_tryacquire(tsx->grp_lock);
2402+
if (status != PJ_SUCCESS) {
2403+
pj_time_val delay = { 0, 0 };
2404+
struct tp_cb_param *param = NULL;
2405+
2406+
lock_timer(tsx);
2407+
tsx_cancel_timer(tsx, &tsx->misc_timer);
2408+
2409+
/* Increment tdata ref count to avoid premature destruction.
2410+
* Note that tsx ref count is already handled by tsx_schedule_timer().
2411+
*/
2412+
pjsip_tx_data_add_ref(tdata);
2413+
2414+
param = PJ_POOL_ZALLOC_T(tsx->pool, struct tp_cb_param);
2415+
param->sent = sent;
2416+
param->tdata = tdata;
2417+
param->tsx = tsx;
2418+
pj_timer_entry_init(&tsx->misc_timer, TIMER_INACTIVE, param,
2419+
&tsx_misc_timer_callback);
2420+
tsx_schedule_timer(tsx, &tsx->misc_timer, &delay, TRANSPORT_CB_TIMER);
2421+
unlock_timer(tsx);
2422+
return;
2423+
}
2424+
2425+
transport_callback_impl(tsx, tdata, sent);
2426+
pj_grp_lock_release(tsx->grp_lock);
2427+
}
2428+
2429+
23572430
/*
23582431
* Callback when transport state changes.
23592432
*/

0 commit comments

Comments
 (0)