@@ -140,6 +140,7 @@ static int max_retrans_count = -1;
140140#define TRANSPORT_ERR_TIMER 3
141141#define TRANSPORT_DISC_TIMER 4
142142#define TERMINATE_TIMER 5
143+ #define TRANSPORT_CB_TIMER 6
143144
144145/* Flags for tsx_set_state() */
145146enum
@@ -2265,23 +2266,21 @@ static void send_msg_callback( pjsip_send_state *send_state,
22652266}
22662267
22672268
2268- /* Transport callback. */
2269- static void transport_callback (void * token , pjsip_tx_data * tdata ,
2270- pj_ssize_t sent )
2271- {
2272- pjsip_transaction * tsx = (pjsip_transaction * ) token ;
2269+ /* Transport callback parameter. */
2270+ struct tp_cb_param {
2271+ pjsip_transaction * tsx ;
2272+ pjsip_tx_data * tdata ;
2273+ pj_ssize_t sent ;
2274+ };
22732275
2274- /* Check if the transaction layer has been shutdown. */
2275- if (mod_tsx_layer .mod .id < 0 )
2276- return ;
22772276
2278- /* In other circumstances, locking tsx->grp_lock AFTER transport mutex
2279- * will introduce deadlock if another thread is currently sending a
2280- * SIP message to the transport. But this should be safe as there should
2281- * be no way this callback could be called while another thread is
2282- * sending a message.
2283- */
2277+ /* Transport callback actual implementation. */
2278+ static void transport_callback_impl (pjsip_transaction * tsx ,
2279+ pjsip_tx_data * tdata ,
2280+ pj_ssize_t sent )
2281+ {
22842282 pj_grp_lock_acquire (tsx -> grp_lock );
2283+
22852284 tsx -> transport_flag &= ~(TSX_HAS_PENDING_TRANSPORT );
22862285
22872286 if (sent > 0 || tsx -> role == PJSIP_ROLE_UAS ) {
@@ -2299,6 +2298,7 @@ static void transport_callback(void *token, pjsip_tx_data *tdata,
22992298 tsx_set_state ( tsx , PJSIP_TSX_STATE_DESTROYED ,
23002299 PJSIP_EVENT_UNKNOWN , NULL , 0 );
23012300 pj_grp_lock_release (tsx -> grp_lock );
2301+ pj_grp_lock_dec_ref (tsx -> grp_lock );
23022302 return ;
23032303 }
23042304
@@ -2354,6 +2354,79 @@ static void transport_callback(void *token, pjsip_tx_data *tdata,
23542354}
23552355
23562356
2357+ /* Timer callback for transport callback.
2358+ * This is currently only used to avoid deadlock due to inversed locking order
2359+ * between transport and transaction.
2360+ */
2361+ static void tsx_misc_timer_callback (pj_timer_heap_t * theap ,
2362+ pj_timer_entry * entry )
2363+ {
2364+ PJ_UNUSED_ARG (theap );
2365+
2366+ if (entry -> id == TRANSPORT_CB_TIMER ) {
2367+ struct tp_cb_param * param = (struct tp_cb_param * )entry -> user_data ;
2368+
2369+ /* Check if the transaction layer has been shutdown. */
2370+ if (mod_tsx_layer .mod .id >= 0 ) {
2371+ /* Call transport callback implementation */
2372+ transport_callback_impl (param -> tsx , param -> tdata , param -> sent );
2373+ }
2374+
2375+ /* Release tdata */
2376+ pjsip_tx_data_dec_ref (param -> tdata );
2377+ }
2378+ }
2379+
2380+
2381+ /* Transport callback. */
2382+ static void transport_callback (void * token , pjsip_tx_data * tdata ,
2383+ pj_ssize_t sent )
2384+ {
2385+ pjsip_transaction * tsx = (pjsip_transaction * ) token ;
2386+ pj_status_t status ;
2387+
2388+ /* Check if the transaction layer has been shutdown. */
2389+ if (mod_tsx_layer .mod .id < 0 )
2390+ return ;
2391+
2392+ /* In other circumstances, locking tsx->grp_lock AFTER transport mutex
2393+ * will introduce deadlock if another thread is currently sending a
2394+ * SIP message to the transport. But this should be safe as there should
2395+ * be no way this callback could be called while another thread is
2396+ * sending a message.
2397+ */
2398+ // Deadlock does happen, see #4453.
2399+ // So now, to avoid deadlock, we'll try to acquire the group lock first,
2400+ // and if it fails, we'll schedule the processing via timer.
2401+ status = pj_grp_lock_tryacquire (tsx -> grp_lock );
2402+ if (status != PJ_SUCCESS ) {
2403+ pj_time_val delay = { 0 , 0 };
2404+ struct tp_cb_param * param = NULL ;
2405+
2406+ lock_timer (tsx );
2407+ tsx_cancel_timer (tsx , & tsx -> misc_timer );
2408+
2409+ /* Increment tdata ref count to avoid premature destruction.
2410+ * Note that tsx ref count is already handled by tsx_schedule_timer().
2411+ */
2412+ pjsip_tx_data_add_ref (tdata );
2413+
2414+ param = PJ_POOL_ZALLOC_T (tsx -> pool , struct tp_cb_param );
2415+ param -> sent = sent ;
2416+ param -> tdata = tdata ;
2417+ param -> tsx = tsx ;
2418+ pj_timer_entry_init (& tsx -> misc_timer , TIMER_INACTIVE , param ,
2419+ & tsx_misc_timer_callback );
2420+ tsx_schedule_timer (tsx , & tsx -> misc_timer , & delay , TRANSPORT_CB_TIMER );
2421+ unlock_timer (tsx );
2422+ return ;
2423+ }
2424+
2425+ transport_callback_impl (tsx , tdata , sent );
2426+ pj_grp_lock_release (tsx -> grp_lock );
2427+ }
2428+
2429+
23572430/*
23582431 * Callback when transport state changes.
23592432 */
0 commit comments