Skip to content

Commit 1a633bd

Browse files
Bob Pearsonjgunthorpe
authored andcommitted
RDMA/rxe: Let destroy qp succeed with stuck packet
In some situations a sent packet may get queued in the NIC longer than than timeout of a ULP. Currently if this happens the ULP may try to reset the link by destroying the qp and setting up an alternate connection but will fail because the rxe driver is waiting for the packet to finish getting sent and be returned to the skb destructor function where the qp reference holding things up will be dropped. This patch modifies the way that the qp is passed to the destructor to pass the qp index and not a qp pointer. Then the destructor will attempt to lookup the qp from its index and if it fails exit early. This requires taking a reference on the struct sock rather than the qp allowing the qp to be destroyed while the sk is still around waiting for the packet to finish. Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Bob Pearson <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent 9cc6290 commit 1a633bd

File tree

2 files changed

+32
-12
lines changed

2 files changed

+32
-12
lines changed

drivers/infiniband/sw/rxe/rxe_net.c

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -345,25 +345,44 @@ int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt,
345345

346346
static void rxe_skb_tx_dtor(struct sk_buff *skb)
347347
{
348-
struct sock *sk = skb->sk;
349-
struct rxe_qp *qp = sk->sk_user_data;
350-
int skb_out = atomic_dec_return(&qp->skb_out);
348+
struct net_device *ndev = skb->dev;
349+
struct rxe_dev *rxe;
350+
unsigned int qp_index;
351+
struct rxe_qp *qp;
352+
int skb_out;
353+
354+
rxe = rxe_get_dev_from_net(ndev);
355+
if (!rxe && is_vlan_dev(ndev))
356+
rxe = rxe_get_dev_from_net(vlan_dev_real_dev(ndev));
357+
if (WARN_ON(!rxe))
358+
return;
351359

352-
if (unlikely(qp->need_req_skb &&
353-
skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
360+
qp_index = (int)(uintptr_t)skb->sk->sk_user_data;
361+
if (!qp_index)
362+
return;
363+
364+
qp = rxe_pool_get_index(&rxe->qp_pool, qp_index);
365+
if (!qp)
366+
goto put_dev;
367+
368+
skb_out = atomic_dec_return(&qp->skb_out);
369+
if (qp->need_req_skb && skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)
354370
rxe_sched_task(&qp->send_task);
355371

356372
rxe_put(qp);
373+
put_dev:
374+
ib_device_put(&rxe->ib_dev);
375+
sock_put(skb->sk);
357376
}
358377

359378
static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
360379
{
361380
int err;
381+
struct sock *sk = pkt->qp->sk->sk;
362382

383+
sock_hold(sk);
384+
skb->sk = sk;
363385
skb->destructor = rxe_skb_tx_dtor;
364-
skb->sk = pkt->qp->sk->sk;
365-
366-
rxe_get(pkt->qp);
367386
atomic_inc(&pkt->qp->skb_out);
368387

369388
if (skb->protocol == htons(ETH_P_IP))
@@ -379,12 +398,13 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
379398
*/
380399
static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
381400
{
401+
struct sock *sk = pkt->qp->sk->sk;
402+
382403
memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt));
383404

405+
sock_hold(sk);
406+
skb->sk = sk;
384407
skb->destructor = rxe_skb_tx_dtor;
385-
skb->sk = pkt->qp->sk->sk;
386-
387-
rxe_get(pkt->qp);
388408
atomic_inc(&pkt->qp->skb_out);
389409

390410
if (skb->protocol == htons(ETH_P_IP))

drivers/infiniband/sw/rxe/rxe_qp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
244244
err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk);
245245
if (err < 0)
246246
return err;
247-
qp->sk->sk->sk_user_data = qp;
247+
qp->sk->sk->sk_user_data = (void *)(uintptr_t)qp->elem.index;
248248

249249
/* pick a source UDP port number for this QP based on
250250
* the source QPN. this spreads traffic for different QPs

0 commit comments

Comments
 (0)