Skip to content

Commit 1acba6a

Browse files
Valentine Fatievjgunthorpe
authored andcommitted
IB/ipoib: Fix double free of skb in case of multicast traffic in CM mode
When connected mode is set, and we have connected and datagram traffic in parallel, ipoib might crash with double free of datagram skb. The current mechanism assumes that the order in the completion queue is the same as the order of sent packets for all QPs. Order is kept only for specific QP, in case of mixed UD and CM traffic we have few QPs (one UD and few CM's) in parallel. The problem: ---------------------------------------------------------- Transmit queue: ----------------- UD skb pointer kept in queue itself, CM skb kept in spearate queue and uses transmit queue as a placeholder to count the number of total transmitted packets. 0 1 2 3 4 5 6 7 8 9 10 11 12 13 .........127 ------------------------------------------------------------ NL ud1 UD2 CM1 ud3 cm2 cm3 ud4 cm4 ud5 NL NL NL ........... ------------------------------------------------------------ ^ ^ tail head Completion queue (problematic scenario) - the order not the same as in the transmit queue: 1 2 3 4 5 6 7 8 9 ------------------------------------ ud1 CM1 UD2 ud3 cm2 cm3 ud4 cm4 ud5 ------------------------------------ 1. CM1 'wc' processing - skb freed in cm separate ring. - tx_tail of transmit queue increased although UD2 is not freed. Now driver assumes UD2 index is already freed and it could be used for new transmitted skb. 0 1 2 3 4 5 6 7 8 9 10 11 12 13 .........127 ------------------------------------------------------------ NL NL UD2 CM1 ud3 cm2 cm3 ud4 cm4 ud5 NL NL NL ........... ------------------------------------------------------------ ^ ^ ^ (Bad)tail head (Bad - Could be used for new SKB) In this case (due to heavy load) UD2 skb pointer could be replaced by new transmitted packet UD_NEW, as the driver assumes its free. At this point we will have to process two 'wc' with same index but we have only one pointer to free. During second attempt to free the same skb we will have NULL pointer exception. 2. UD2 'wc' processing - skb freed according the index we got from 'wc', but it was already overwritten by mistake. So actually the skb that was released is the skb of the new transmitted packet and not the original one. 3. UD_NEW 'wc' processing - attempt to free already freed skb. NUll pointer exception. The fix: ----------------------------------------------------------------------- The fix is to stop using the UD ring as a placeholder for CM packets, the cyclic ring variables tx_head and tx_tail will manage the UD tx_ring, a new cyclic variables global_tx_head and global_tx_tail are introduced for managing and counting the overall outstanding sent packets, then the send queue will be stopped and waken based on these variables only. Note that no locking is needed since global_tx_head is updated in the xmit flow and global_tx_tail is updated in the NAPI flow only. A previous attempt tried to use one variable to count the outstanding sent packets, but it did not work since xmit and NAPI flows can run at the same time and the counter will be updated wrongly. Thus, we use the same simple cyclic head and tail scheme that we have today for the UD tx_ring. Fixes: 2c104ea ("IB/ipoib: Get rid of the tx_outstanding variable in all modes") Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Valentine Fatiev <[email protected]> Signed-off-by: Alaa Hleihel <[email protected]> Signed-off-by: Leon Romanovsky <[email protected]> Acked-by: Doug Ledford <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent c85f4ab commit 1acba6a

File tree

4 files changed

+26
-12
lines changed

4 files changed

+26
-12
lines changed

drivers/infiniband/ulp/ipoib/ipoib.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,8 +377,12 @@ struct ipoib_dev_priv {
377377
struct ipoib_rx_buf *rx_ring;
378378

379379
struct ipoib_tx_buf *tx_ring;
380+
/* cyclic ring variables for managing tx_ring, for UD only */
380381
unsigned int tx_head;
381382
unsigned int tx_tail;
383+
/* cyclic ring variables for counting overall outstanding send WRs */
384+
unsigned int global_tx_head;
385+
unsigned int global_tx_tail;
382386
struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
383387
struct ib_ud_wr tx_wr;
384388
struct ib_wc send_wc[MAX_SEND_CQE];

drivers/infiniband/ulp/ipoib/ipoib_cm.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -756,7 +756,8 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
756756
return;
757757
}
758758

759-
if ((priv->tx_head - priv->tx_tail) == ipoib_sendq_size - 1) {
759+
if ((priv->global_tx_head - priv->global_tx_tail) ==
760+
ipoib_sendq_size - 1) {
760761
ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
761762
tx->qp->qp_num);
762763
netif_stop_queue(dev);
@@ -786,7 +787,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
786787
} else {
787788
netif_trans_update(dev);
788789
++tx->tx_head;
789-
++priv->tx_head;
790+
++priv->global_tx_head;
790791
}
791792
}
792793

@@ -820,10 +821,11 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
820821
netif_tx_lock(dev);
821822

822823
++tx->tx_tail;
823-
++priv->tx_tail;
824+
++priv->global_tx_tail;
824825

825826
if (unlikely(netif_queue_stopped(dev) &&
826-
(priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1 &&
827+
((priv->global_tx_head - priv->global_tx_tail) <=
828+
ipoib_sendq_size >> 1) &&
827829
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)))
828830
netif_wake_queue(dev);
829831

@@ -1232,8 +1234,9 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
12321234
dev_kfree_skb_any(tx_req->skb);
12331235
netif_tx_lock_bh(p->dev);
12341236
++p->tx_tail;
1235-
++priv->tx_tail;
1236-
if (unlikely(priv->tx_head - priv->tx_tail == ipoib_sendq_size >> 1) &&
1237+
++priv->global_tx_tail;
1238+
if (unlikely((priv->global_tx_head - priv->global_tx_tail) <=
1239+
ipoib_sendq_size >> 1) &&
12371240
netif_queue_stopped(p->dev) &&
12381241
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
12391242
netif_wake_queue(p->dev);

drivers/infiniband/ulp/ipoib/ipoib_ib.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -407,9 +407,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
407407
dev_kfree_skb_any(tx_req->skb);
408408

409409
++priv->tx_tail;
410+
++priv->global_tx_tail;
410411

411412
if (unlikely(netif_queue_stopped(dev) &&
412-
((priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1) &&
413+
((priv->global_tx_head - priv->global_tx_tail) <=
414+
ipoib_sendq_size >> 1) &&
413415
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)))
414416
netif_wake_queue(dev);
415417

@@ -634,7 +636,8 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
634636
else
635637
priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
636638
/* increase the tx_head after send success, but use it for queue state */
637-
if (priv->tx_head - priv->tx_tail == ipoib_sendq_size - 1) {
639+
if ((priv->global_tx_head - priv->global_tx_tail) ==
640+
ipoib_sendq_size - 1) {
638641
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
639642
netif_stop_queue(dev);
640643
}
@@ -662,6 +665,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
662665

663666
rc = priv->tx_head;
664667
++priv->tx_head;
668+
++priv->global_tx_head;
665669
}
666670
return rc;
667671
}
@@ -807,6 +811,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev)
807811
ipoib_dma_unmap_tx(priv, tx_req);
808812
dev_kfree_skb_any(tx_req->skb);
809813
++priv->tx_tail;
814+
++priv->global_tx_tail;
810815
}
811816

812817
for (i = 0; i < ipoib_recvq_size; ++i) {

drivers/infiniband/ulp/ipoib/ipoib_main.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1184,9 +1184,11 @@ static void ipoib_timeout(struct net_device *dev, unsigned int txqueue)
11841184

11851185
ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
11861186
jiffies_to_msecs(jiffies - dev_trans_start(dev)));
1187-
ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n",
1188-
netif_queue_stopped(dev),
1189-
priv->tx_head, priv->tx_tail);
1187+
ipoib_warn(priv,
1188+
"queue stopped %d, tx_head %u, tx_tail %u, global_tx_head %u, global_tx_tail %u\n",
1189+
netif_queue_stopped(dev), priv->tx_head, priv->tx_tail,
1190+
priv->global_tx_head, priv->global_tx_tail);
1191+
11901192
/* XXX reset QP, etc. */
11911193
}
11921194

@@ -1701,7 +1703,7 @@ static int ipoib_dev_init_default(struct net_device *dev)
17011703
goto out_rx_ring_cleanup;
17021704
}
17031705

1704-
/* priv->tx_head, tx_tail & tx_outstanding are already 0 */
1706+
/* priv->tx_head, tx_tail and global_tx_tail/head are already 0 */
17051707

17061708
if (ipoib_transport_dev_init(dev, priv->ca)) {
17071709
pr_warn("%s: ipoib_transport_dev_init failed\n",

0 commit comments

Comments
 (0)