Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 98 additions & 14 deletions net/xdp/xsk.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,20 @@
#define TX_BATCH_SIZE 32
#define MAX_PER_SOCKET_BUDGET 32

struct xsk_addr_node {
u64 addr;
struct list_head addr_node;
};

struct xsk_addr_head {
u32 num_descs;
struct list_head addrs_list;
};

static struct kmem_cache *xsk_tx_generic_cache;

#define XSKCB(skb) ((struct xsk_addr_head *)((skb)->cb))

void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
{
if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
Expand Down Expand Up @@ -532,24 +546,42 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
}

static int xsk_cq_reserve_addr_locked(struct xsk_buff_pool *pool, u64 addr)
static int xsk_cq_reserve_locked(struct xsk_buff_pool *pool)
{
unsigned long flags;
int ret;

spin_lock_irqsave(&pool->cq_lock, flags);
ret = xskq_prod_reserve_addr(pool->cq, addr);
ret = xskq_prod_reserve(pool->cq);
spin_unlock_irqrestore(&pool->cq_lock, flags);

return ret;
}

static void xsk_cq_submit_locked(struct xsk_buff_pool *pool, u32 n)
static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool,
struct sk_buff *skb)
{
struct xsk_addr_node *pos, *tmp;
u32 descs_processed = 0;
unsigned long flags;
u32 idx;

spin_lock_irqsave(&pool->cq_lock, flags);
xskq_prod_submit_n(pool->cq, n);
idx = xskq_get_prod(pool->cq);

xskq_prod_write_addr(pool->cq, idx, (u64)skb_shinfo(skb)->destructor_arg);
descs_processed++;

if (unlikely(XSKCB(skb)->num_descs > 1)) {
list_for_each_entry_safe(pos, tmp, &XSKCB(skb)->addrs_list, addr_node) {
xskq_prod_write_addr(pool->cq, idx + descs_processed,
pos->addr);
descs_processed++;
list_del(&pos->addr_node);
kmem_cache_free(xsk_tx_generic_cache, pos);
}
}
xskq_prod_submit_n(pool->cq, descs_processed);
spin_unlock_irqrestore(&pool->cq_lock, flags);
}

Expand All @@ -562,9 +594,14 @@ static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n)
spin_unlock_irqrestore(&pool->cq_lock, flags);
}

static void xsk_inc_num_desc(struct sk_buff *skb)
{
XSKCB(skb)->num_descs++;
}

static u32 xsk_get_num_desc(struct sk_buff *skb)
{
return skb ? (long)skb_shinfo(skb)->destructor_arg : 0;
return XSKCB(skb)->num_descs;
}

static void xsk_destruct_skb(struct sk_buff *skb)
Expand All @@ -576,23 +613,33 @@ static void xsk_destruct_skb(struct sk_buff *skb)
*compl->tx_timestamp = ktime_get_tai_fast_ns();
}

xsk_cq_submit_locked(xdp_sk(skb->sk)->pool, xsk_get_num_desc(skb));
xsk_cq_submit_addr_locked(xdp_sk(skb->sk)->pool, skb);
sock_wfree(skb);
}

static void xsk_set_destructor_arg(struct sk_buff *skb)
static void xsk_set_destructor_arg(struct sk_buff *skb, u64 addr)
{
long num = xsk_get_num_desc(xdp_sk(skb->sk)->skb) + 1;

skb_shinfo(skb)->destructor_arg = (void *)num;
BUILD_BUG_ON(sizeof(struct xsk_addr_head) > sizeof(skb->cb));
INIT_LIST_HEAD(&XSKCB(skb)->addrs_list);
XSKCB(skb)->num_descs = 0;
skb_shinfo(skb)->destructor_arg = (void *)addr;
}

static void xsk_consume_skb(struct sk_buff *skb)
{
struct xdp_sock *xs = xdp_sk(skb->sk);
u32 num_descs = xsk_get_num_desc(skb);
struct xsk_addr_node *pos, *tmp;

if (unlikely(num_descs > 1)) {
list_for_each_entry_safe(pos, tmp, &XSKCB(skb)->addrs_list, addr_node) {
list_del(&pos->addr_node);
kmem_cache_free(xsk_tx_generic_cache, pos);
}
}

skb->destructor = sock_wfree;
xsk_cq_cancel_locked(xs->pool, xsk_get_num_desc(skb));
xsk_cq_cancel_locked(xs->pool, num_descs);
/* Free skb without triggering the perf drop trace */
consume_skb(skb);
xs->skb = NULL;
Expand All @@ -609,6 +656,7 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
{
struct xsk_buff_pool *pool = xs->pool;
u32 hr, len, ts, offset, copy, copied;
struct xsk_addr_node *xsk_addr;
struct sk_buff *skb = xs->skb;
struct page *page;
void *buffer;
Expand All @@ -623,6 +671,19 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
return ERR_PTR(err);

skb_reserve(skb, hr);

xsk_set_destructor_arg(skb, desc->addr);
} else {
xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL);
if (!xsk_addr)
return ERR_PTR(-ENOMEM);

/* in case of -EOVERFLOW that could happen below,
* xsk_consume_skb() will release this node as whole skb
* would be dropped, which implies freeing all list elements
*/
xsk_addr->addr = desc->addr;
list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list);
}

addr = desc->addr;
Expand Down Expand Up @@ -694,8 +755,11 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
err = skb_store_bits(skb, 0, buffer, len);
if (unlikely(err))
goto free_err;

xsk_set_destructor_arg(skb, desc->addr);
} else {
int nr_frags = skb_shinfo(skb)->nr_frags;
struct xsk_addr_node *xsk_addr;
struct page *page;
u8 *vaddr;

Expand All @@ -710,12 +774,22 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
goto free_err;
}

xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL);
if (!xsk_addr) {
__free_page(page);
err = -ENOMEM;
goto free_err;
}

vaddr = kmap_local_page(page);
memcpy(vaddr, buffer, len);
kunmap_local(vaddr);

skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE);
refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc);

xsk_addr->addr = desc->addr;
list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list);
}

if (first_frag && desc->options & XDP_TX_METADATA) {
Expand Down Expand Up @@ -759,7 +833,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
skb->mark = READ_ONCE(xs->sk.sk_mark);
skb->destructor = xsk_destruct_skb;
xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta);
xsk_set_destructor_arg(skb);
xsk_inc_num_desc(skb);

return skb;

Expand All @@ -769,7 +843,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,

if (err == -EOVERFLOW) {
/* Drop the packet */
xsk_set_destructor_arg(xs->skb);
xsk_inc_num_desc(xs->skb);
xsk_drop_skb(xs->skb);
xskq_cons_release(xs->tx);
} else {
Expand Down Expand Up @@ -812,7 +886,7 @@ static int __xsk_generic_xmit(struct sock *sk)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
err = xsk_cq_reserve_addr_locked(xs->pool, desc.addr);
err = xsk_cq_reserve_locked(xs->pool);
if (err) {
err = -EAGAIN;
goto out;
Expand Down Expand Up @@ -1815,8 +1889,18 @@ static int __init xsk_init(void)
if (err)
goto out_pernet;

xsk_tx_generic_cache = kmem_cache_create("xsk_generic_xmit_cache",
sizeof(struct xsk_addr_node),
0, SLAB_HWCACHE_ALIGN, NULL);
if (!xsk_tx_generic_cache) {
err = -ENOMEM;
goto out_unreg_notif;
}

return 0;

out_unreg_notif:
unregister_netdevice_notifier(&xsk_netdev_notifier);
out_pernet:
unregister_pernet_subsys(&xsk_net_ops);
out_sk:
Expand Down
12 changes: 12 additions & 0 deletions net/xdp/xsk_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,11 @@ static inline u32 xskq_cons_present_entries(struct xsk_queue *q)

/* Functions for producers */

static inline u32 xskq_get_prod(struct xsk_queue *q)
{
return READ_ONCE(q->ring->producer);
}

static inline u32 xskq_prod_nb_free(struct xsk_queue *q, u32 max)
{
u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons);
Expand Down Expand Up @@ -390,6 +395,13 @@ static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr)
return 0;
}

static inline void xskq_prod_write_addr(struct xsk_queue *q, u32 idx, u64 addr)
{
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;

ring->desc[idx & q->ring_mask] = addr;
}

static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_desc *descs,
u32 nb_entries)
{
Expand Down
Loading