Skip to content

Commit 993138a

Browse files
mfijalkoKernel Patches Daemon
authored andcommitted
xsk: fix immature cq descriptor production
Eryk reported an issue that I have put under Closes: tag, related to umem addrs being prematurely produced onto pool's completion queue. Let us make the skb's destructor responsible for producing all addrs that given skb used. Introduce struct xsk_addrs which will carry descriptor count with array of addresses taken from processed descriptors that will be carried via skb_shared_info::destructor_arg. This way we can refer to it within xsk_destruct_skb(). In order to mitigate the overhead that will be coming from memory allocations, let us introduce kmem_cache of xsk_addrs, but be smart about scalability, as assigning unique cache per each socket might be expensive. Store kmem_cache as percpu variables with embedded refcounting and let the xsk code index it by queue id. In case when a NIC#0 already runs copy mode xsk socket on queue #10 and user starts a socket on <NIC#1,qid#10> tuple, the kmem_cache is reused. Keep the pointer to kmem_cache in xdp_sock to avoid accessing bss in data path. Commit from fixes tag introduced the buggy behavior, it was not broken from day 1, but rather when xsk multi-buffer got introduced. Fixes: b7f72a3 ("xsk: introduce wrappers and helpers for supporting multi-buffer in Tx path") Reported-by: Eryk Kubanski <[email protected]> Closes: https://lore.kernel.org/netdev/[email protected]/ Acked-by: Magnus Karlsson <[email protected]> Signed-off-by: Maciej Fijalkowski <[email protected]>
1 parent cd1580c commit 993138a

File tree

3 files changed

+135
-18
lines changed

3 files changed

+135
-18
lines changed

include/net/xdp_sock.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ struct xdp_sock {
6161
XSK_BOUND,
6262
XSK_UNBOUND,
6363
} state;
64+
struct kmem_cache *generic_cache;
6465

6566
struct xsk_queue *tx ____cacheline_aligned_in_smp;
6667
struct list_head tx_list;

net/xdp/xsk.c

Lines changed: 122 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,18 @@
3636
#define TX_BATCH_SIZE 32
3737
#define MAX_PER_SOCKET_BUDGET 32
3838

39+
struct xsk_addrs {
40+
u32 num_descs;
41+
u64 addrs[MAX_SKB_FRAGS + 1];
42+
};
43+
44+
struct xsk_generic_cache {
45+
struct kmem_cache *cache;
46+
refcount_t users;
47+
};
48+
49+
DEFINE_PER_CPU(struct xsk_generic_cache, system_xsk_generic_cache);
50+
3951
void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
4052
{
4153
if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -532,25 +544,39 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
532544
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
533545
}
534546

535-
static int xsk_cq_reserve_addr_locked(struct xsk_buff_pool *pool, u64 addr)
547+
static int xsk_cq_reserve_locked(struct xsk_buff_pool *pool)
536548
{
537549
unsigned long flags;
538550
int ret;
539551

540552
spin_lock_irqsave(&pool->cq_lock, flags);
541-
ret = xskq_prod_reserve_addr(pool->cq, addr);
553+
ret = xskq_prod_reserve(pool->cq);
542554
spin_unlock_irqrestore(&pool->cq_lock, flags);
543555

544556
return ret;
545557
}
546558

547-
static void xsk_cq_submit_locked(struct xsk_buff_pool *pool, u32 n)
559+
static void xsk_cq_submit_addr_locked(struct xdp_sock *xs,
560+
struct sk_buff *skb)
548561
{
562+
struct xsk_buff_pool *pool = xs->pool;
563+
struct xsk_addrs *xsk_addrs;
549564
unsigned long flags;
565+
u32 num_desc, i;
566+
u32 idx;
567+
568+
xsk_addrs = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
569+
num_desc = xsk_addrs->num_descs;
550570

551571
spin_lock_irqsave(&pool->cq_lock, flags);
552-
xskq_prod_submit_n(pool->cq, n);
572+
idx = xskq_get_prod(pool->cq);
573+
574+
for (i = 0; i < num_desc; i++)
575+
xskq_prod_write_addr(pool->cq, idx + i, xsk_addrs->addrs[i]);
576+
xskq_prod_submit_n(pool->cq, num_desc);
577+
553578
spin_unlock_irqrestore(&pool->cq_lock, flags);
579+
kmem_cache_free(xs->generic_cache, xsk_addrs);
554580
}
555581

556582
static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n)
@@ -562,11 +588,6 @@ static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n)
562588
spin_unlock_irqrestore(&pool->cq_lock, flags);
563589
}
564590

565-
static u32 xsk_get_num_desc(struct sk_buff *skb)
566-
{
567-
return skb ? (long)skb_shinfo(skb)->destructor_arg : 0;
568-
}
569-
570591
static void xsk_destruct_skb(struct sk_buff *skb)
571592
{
572593
struct xsk_tx_metadata_compl *compl = &skb_shinfo(skb)->xsk_meta;
@@ -576,21 +597,37 @@ static void xsk_destruct_skb(struct sk_buff *skb)
576597
*compl->tx_timestamp = ktime_get_tai_fast_ns();
577598
}
578599

579-
xsk_cq_submit_locked(xdp_sk(skb->sk)->pool, xsk_get_num_desc(skb));
600+
xsk_cq_submit_addr_locked(xdp_sk(skb->sk), skb);
580601
sock_wfree(skb);
581602
}
582603

583-
static void xsk_set_destructor_arg(struct sk_buff *skb)
604+
static u32 xsk_get_num_desc(struct sk_buff *skb)
584605
{
585-
long num = xsk_get_num_desc(xdp_sk(skb->sk)->skb) + 1;
606+
struct xsk_addrs *addrs;
607+
608+
addrs = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
609+
return addrs->num_descs;
610+
}
586611

587-
skb_shinfo(skb)->destructor_arg = (void *)num;
612+
static void xsk_set_destructor_arg(struct sk_buff *skb, struct xsk_addrs *addrs)
613+
{
614+
skb_shinfo(skb)->destructor_arg = (void *)addrs;
615+
}
616+
617+
static void xsk_inc_skb_descs(struct sk_buff *skb)
618+
{
619+
struct xsk_addrs *addrs;
620+
621+
addrs = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
622+
addrs->num_descs++;
588623
}
589624

590625
static void xsk_consume_skb(struct sk_buff *skb)
591626
{
592627
struct xdp_sock *xs = xdp_sk(skb->sk);
593628

629+
kmem_cache_free(xs->generic_cache,
630+
(struct xsk_addrs *)skb_shinfo(skb)->destructor_arg);
594631
skb->destructor = sock_wfree;
595632
xsk_cq_cancel_locked(xs->pool, xsk_get_num_desc(skb));
596633
/* Free skb without triggering the perf drop trace */
@@ -605,10 +642,12 @@ static void xsk_drop_skb(struct sk_buff *skb)
605642
}
606643

607644
static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
608-
struct xdp_desc *desc)
645+
struct xdp_desc *desc,
646+
struct kmem_cache *cache)
609647
{
610648
struct xsk_buff_pool *pool = xs->pool;
611649
u32 hr, len, ts, offset, copy, copied;
650+
struct xsk_addrs *addrs = NULL;
612651
struct sk_buff *skb = xs->skb;
613652
struct page *page;
614653
void *buffer;
@@ -623,6 +662,12 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
623662
return ERR_PTR(err);
624663

625664
skb_reserve(skb, hr);
665+
666+
addrs = kmem_cache_zalloc(cache, GFP_KERNEL);
667+
if (!addrs)
668+
return ERR_PTR(-ENOMEM);
669+
670+
xsk_set_destructor_arg(skb, addrs);
626671
}
627672

628673
addr = desc->addr;
@@ -662,12 +707,13 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
662707
{
663708
struct xsk_tx_metadata *meta = NULL;
664709
struct net_device *dev = xs->dev;
710+
struct xsk_addrs *addrs = NULL;
665711
struct sk_buff *skb = xs->skb;
666712
bool first_frag = false;
667713
int err;
668714

669715
if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
670-
skb = xsk_build_skb_zerocopy(xs, desc);
716+
skb = xsk_build_skb_zerocopy(xs, desc, xs->generic_cache);
671717
if (IS_ERR(skb)) {
672718
err = PTR_ERR(skb);
673719
goto free_err;
@@ -694,6 +740,15 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
694740
err = skb_store_bits(skb, 0, buffer, len);
695741
if (unlikely(err))
696742
goto free_err;
743+
744+
addrs = kmem_cache_zalloc(xs->generic_cache, GFP_KERNEL);
745+
if (!addrs) {
746+
err = -ENOMEM;
747+
goto free_err;
748+
}
749+
750+
xsk_set_destructor_arg(skb, addrs);
751+
697752
} else {
698753
int nr_frags = skb_shinfo(skb)->nr_frags;
699754
struct page *page;
@@ -759,7 +814,9 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
759814
skb->mark = READ_ONCE(xs->sk.sk_mark);
760815
skb->destructor = xsk_destruct_skb;
761816
xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta);
762-
xsk_set_destructor_arg(skb);
817+
818+
addrs = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg;
819+
addrs->addrs[addrs->num_descs++] = desc->addr;
763820

764821
return skb;
765822

@@ -769,7 +826,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
769826

770827
if (err == -EOVERFLOW) {
771828
/* Drop the packet */
772-
xsk_set_destructor_arg(xs->skb);
829+
xsk_inc_skb_descs(xs->skb);
773830
xsk_drop_skb(xs->skb);
774831
xskq_cons_release(xs->tx);
775832
} else {
@@ -812,7 +869,7 @@ static int __xsk_generic_xmit(struct sock *sk)
812869
* if there is space in it. This avoids having to implement
813870
* any buffering in the Tx path.
814871
*/
815-
err = xsk_cq_reserve_addr_locked(xs->pool, desc.addr);
872+
err = xsk_cq_reserve_locked(xs->pool);
816873
if (err) {
817874
err = -EAGAIN;
818875
goto out;
@@ -1095,6 +1152,7 @@ static void xsk_delete_from_maps(struct xdp_sock *xs)
10951152

10961153
static int xsk_release(struct socket *sock)
10971154
{
1155+
struct xsk_generic_cache *pcpu_cache;
10981156
struct sock *sk = sock->sk;
10991157
struct xdp_sock *xs = xdp_sk(sk);
11001158
struct net *net;
@@ -1123,6 +1181,15 @@ static int xsk_release(struct socket *sock)
11231181
xskq_destroy(xs->fq_tmp);
11241182
xskq_destroy(xs->cq_tmp);
11251183

1184+
pcpu_cache = per_cpu_ptr(&system_xsk_generic_cache, xs->queue_id);
1185+
if (pcpu_cache->cache) {
1186+
if (refcount_dec_and_test(&pcpu_cache->users)) {
1187+
kmem_cache_destroy(pcpu_cache->cache);
1188+
pcpu_cache->cache = NULL;
1189+
xs->generic_cache = NULL;
1190+
}
1191+
}
1192+
11261193
sock_orphan(sk);
11271194
sock->sk = NULL;
11281195

@@ -1153,6 +1220,33 @@ static bool xsk_validate_queues(struct xdp_sock *xs)
11531220
return xs->fq_tmp && xs->cq_tmp;
11541221
}
11551222

1223+
static int xsk_alloc_generic_xmit_cache(struct xdp_sock *xs, u16 qid)
1224+
{
1225+
struct xsk_generic_cache *pcpu_cache =
1226+
per_cpu_ptr(&system_xsk_generic_cache, qid);
1227+
struct kmem_cache *cache;
1228+
char cache_name[32];
1229+
1230+
if (refcount_read(&pcpu_cache->users) > 0) {
1231+
refcount_inc(&pcpu_cache->users);
1232+
xs->generic_cache = pcpu_cache->cache;
1233+
return 0;
1234+
}
1235+
1236+
snprintf(cache_name, sizeof(cache_name),
1237+
"xsk_generic_xmit_cache%d", qid);
1238+
cache = kmem_cache_create(cache_name, sizeof(struct xsk_addrs), 0,
1239+
SLAB_HWCACHE_ALIGN, NULL);
1240+
if (!cache)
1241+
return -ENOMEM;
1242+
1243+
refcount_set(&pcpu_cache->users, 1);
1244+
pcpu_cache->cache = cache;
1245+
xs->generic_cache = pcpu_cache->cache;
1246+
1247+
return 0;
1248+
}
1249+
11561250
static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
11571251
{
11581252
struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
@@ -1306,6 +1400,16 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
13061400
xs->zc = xs->umem->zc;
13071401
xs->sg = !!(xs->umem->flags & XDP_UMEM_SG_FLAG);
13081402
xs->queue_id = qid;
1403+
1404+
if (!xs->zc) {
1405+
err = xsk_alloc_generic_xmit_cache(xs, qid);
1406+
if (err) {
1407+
xp_destroy(xs->pool);
1408+
xs->pool = NULL;
1409+
goto out_unlock;
1410+
}
1411+
}
1412+
13091413
xp_add_xsk(xs->pool, xs);
13101414

13111415
if (qid < dev->real_num_rx_queues) {

net/xdp/xsk_queue.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,11 @@ static inline u32 xskq_cons_present_entries(struct xsk_queue *q)
344344

345345
/* Functions for producers */
346346

347+
static inline u32 xskq_get_prod(struct xsk_queue *q)
348+
{
349+
return READ_ONCE(q->ring->producer);
350+
}
351+
347352
static inline u32 xskq_prod_nb_free(struct xsk_queue *q, u32 max)
348353
{
349354
u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons);
@@ -390,6 +395,13 @@ static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr)
390395
return 0;
391396
}
392397

398+
static inline void xskq_prod_write_addr(struct xsk_queue *q, u32 idx, u64 addr)
399+
{
400+
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
401+
402+
ring->desc[idx & q->ring_mask] = addr;
403+
}
404+
393405
static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_desc *descs,
394406
u32 nb_entries)
395407
{

0 commit comments

Comments
 (0)