Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 35 additions & 3 deletions drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1729,6 +1729,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
struct mlx5e_wqe_frag_info *head_wi = wi;
u16 rx_headroom = rq->buff.headroom;
struct mlx5e_frag_page *frag_page;
u8 nr_frags_free, old_nr_frags;
struct skb_shared_info *sinfo;
u32 frag_consumed_bytes;
struct bpf_prog *prog;
Expand Down Expand Up @@ -1772,17 +1773,27 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
wi++;
}

old_nr_frags = sinfo->nr_frags;

prog = rcu_dereference(rq->xdp_prog);
if (prog && mlx5e_xdp_handle(rq, prog, mxbuf)) {
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
struct mlx5e_wqe_frag_info *pwi;

wi -= old_nr_frags - sinfo->nr_frags;

for (pwi = head_wi; pwi < wi; pwi++)
pwi->frag_page->frags++;
}
return NULL; /* page/packet was consumed by XDP */
}

nr_frags_free = old_nr_frags - sinfo->nr_frags;
if (unlikely(nr_frags_free)) {
wi -= nr_frags_free;
truesize -= nr_frags_free * frag_info->frag_stride;
}

skb = mlx5e_build_linear_skb(
rq, mxbuf->xdp.data_hard_start, rq->buff.frame0_sz,
mxbuf->xdp.data - mxbuf->xdp.data_hard_start,
Expand Down Expand Up @@ -2004,6 +2015,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
u32 byte_cnt = cqe_bcnt;
struct skb_shared_info *sinfo;
unsigned int truesize = 0;
u32 pg_consumed_bytes;
struct bpf_prog *prog;
struct sk_buff *skb;
u32 linear_frame_sz;
Expand Down Expand Up @@ -2057,7 +2069,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w

while (byte_cnt) {
/* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);

if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
truesize += pg_consumed_bytes;
Expand All @@ -2073,10 +2085,15 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
}

if (prog) {
u8 nr_frags_free, old_nr_frags = sinfo->nr_frags;
u32 len;

if (mlx5e_xdp_handle(rq, prog, mxbuf)) {
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
struct mlx5e_frag_page *pfp;

frag_page -= old_nr_frags - sinfo->nr_frags;

for (pfp = head_page; pfp < frag_page; pfp++)
pfp->frags++;

Expand All @@ -2087,9 +2104,22 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
return NULL; /* page/packet was consumed by XDP */
}

len = mxbuf->xdp.data_end - mxbuf->xdp.data;

nr_frags_free = old_nr_frags - sinfo->nr_frags;
if (unlikely(nr_frags_free)) {
frag_page -= nr_frags_free;

/* the last frag is always freed first */
truesize -= ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
while (--nr_frags_free)
truesize -= nr_frags_free *
ALIGN(PAGE_SIZE, BIT(rq->mpwqe.log_stride_sz));
}

skb = mlx5e_build_linear_skb(
rq, mxbuf->xdp.data_hard_start, linear_frame_sz,
mxbuf->xdp.data - mxbuf->xdp.data_hard_start, 0,
mxbuf->xdp.data - mxbuf->xdp.data_hard_start, len,
mxbuf->xdp.data - mxbuf->xdp.data_meta);
if (unlikely(!skb)) {
mlx5e_page_release_fragmented(rq->page_pool,
Expand All @@ -2114,8 +2144,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
do
pagep->frags++;
while (++pagep < frag_page);

headlen = min_t(u16, MLX5E_RX_MAX_HEAD - len, sinfo->xdp_frags_size);
__pskb_pull_tail(skb, headlen);
}
__pskb_pull_tail(skb, headlen);
} else {
dma_addr_t addr;

Expand Down
21 changes: 18 additions & 3 deletions include/net/xdp_sock_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,13 +160,23 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
return ret;
}

static inline void xsk_buff_del_tail(struct xdp_buff *tail)
static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
{
struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);

list_del(&xskb->list_node);
}

static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
{
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
struct xdp_buff_xsk *frag;

frag = list_first_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
list_node);
return &frag->xdp;
}

static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
{
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
Expand Down Expand Up @@ -389,8 +399,13 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
return NULL;
}

static inline void xsk_buff_del_tail(struct xdp_buff *tail)
static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
{
}

static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
{
return NULL;
}

static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
Expand Down
13 changes: 13 additions & 0 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -12235,6 +12235,7 @@ enum special_kfunc_type {
KF_bpf_dynptr_from_skb,
KF_bpf_dynptr_from_xdp,
KF_bpf_dynptr_from_skb_meta,
KF_bpf_xdp_pull_data,
KF_bpf_dynptr_slice,
KF_bpf_dynptr_slice_rdwr,
KF_bpf_dynptr_clone,
Expand Down Expand Up @@ -12285,10 +12286,12 @@ BTF_ID(func, bpf_rbtree_right)
BTF_ID(func, bpf_dynptr_from_skb)
BTF_ID(func, bpf_dynptr_from_xdp)
BTF_ID(func, bpf_dynptr_from_skb_meta)
BTF_ID(func, bpf_xdp_pull_data)
#else
BTF_ID_UNUSED
BTF_ID_UNUSED
BTF_ID_UNUSED
BTF_ID_UNUSED
#endif
BTF_ID(func, bpf_dynptr_slice)
BTF_ID(func, bpf_dynptr_slice_rdwr)
Expand Down Expand Up @@ -12358,6 +12361,11 @@ static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
}

static bool is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data];
}

static enum kfunc_ptr_arg_type
get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
struct bpf_kfunc_call_arg_meta *meta,
Expand Down Expand Up @@ -14077,6 +14085,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
}

if (is_kfunc_pkt_changing(&meta))
clear_all_pkt_pointers(env);

nargs = btf_type_vlen(meta.func_proto);
args = (const struct btf_param *)(meta.func_proto + 1);
for (i = 0; i < nargs; i++) {
Expand Down Expand Up @@ -17798,6 +17809,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
*/
if (ret == 0 && is_kfunc_sleepable(&meta))
mark_subprog_might_sleep(env, t);
if (ret == 0 && is_kfunc_pkt_changing(&meta))
mark_subprog_changes_pkt_data(env, t);
}
return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);

Expand Down
9 changes: 5 additions & 4 deletions net/bpf/test_run.c
Original file line number Diff line number Diff line change
Expand Up @@ -1207,8 +1207,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
{
bool do_live = (kattr->test.flags & BPF_F_TEST_XDP_LIVE_FRAMES);
u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
u32 retval = 0, duration, max_data_sz, data_sz;
u32 batch_size = kattr->test.batch_size;
u32 retval = 0, duration, max_data_sz;
u32 size = kattr->test.data_size_in;
u32 headroom = XDP_PACKET_HEADROOM;
u32 repeat = kattr->test.repeat;
Expand Down Expand Up @@ -1246,7 +1246,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,

if (ctx) {
/* There can't be user provided data before the meta data */
if (ctx->data_meta || ctx->data_end != size ||
if (ctx->data_meta || ctx->data_end > size ||
ctx->data > ctx->data_end ||
unlikely(xdp_metalen_invalid(ctx->data)) ||
(do_live && (kattr->test.data_out || kattr->test.ctx_out)))
Expand All @@ -1256,11 +1256,12 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
}

max_data_sz = PAGE_SIZE - headroom - tailroom;
if (size > max_data_sz) {
data_sz = (ctx && ctx->data_end < max_data_sz) ? ctx->data_end : max_data_sz;
if (size > data_sz) {
/* disallow live data mode for jumbo frames */
if (do_live)
goto free_ctx;
size = max_data_sz;
size = data_sz;
}

data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom);
Expand Down
104 changes: 93 additions & 11 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -4153,34 +4153,44 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
return 0;
}

static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink, bool tail,
enum xdp_mem_type mem_type, bool release)
{
struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);
struct xdp_buff *zc_frag = tail ? xsk_buff_get_tail(xdp) :
xsk_buff_get_head(xdp);

if (release) {
xsk_buff_del_tail(zc_frag);
xsk_buff_del_frag(zc_frag);
__xdp_return(0, mem_type, false, zc_frag);
} else {
zc_frag->data_end -= shrink;
if (tail)
zc_frag->data_end -= shrink;
else
zc_frag->data += shrink;
}
}

static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
int shrink)
int shrink, bool tail)
{
enum xdp_mem_type mem_type = xdp->rxq->mem.type;
bool release = skb_frag_size(frag) == shrink;

if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
bpf_xdp_shrink_data_zc(xdp, shrink, mem_type, release);
bpf_xdp_shrink_data_zc(xdp, shrink, tail, mem_type, release);
goto out;
}

if (release)
__xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);

out:
if (!release) {
if (!tail)
skb_frag_off_add(frag, shrink);
skb_frag_size_sub(frag, shrink);
}

return release;
}

Expand All @@ -4198,12 +4208,8 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)

len_free += shrink;
offset -= shrink;
if (bpf_xdp_shrink_data(xdp, frag, shrink)) {
if (bpf_xdp_shrink_data(xdp, frag, shrink, true))
n_frags_free++;
} else {
skb_frag_size_sub(frag, shrink);
break;
}
}
sinfo->nr_frags -= n_frags_free;
sinfo->xdp_frags_size -= len_free;
Expand Down Expand Up @@ -12206,6 +12212,81 @@ __bpf_kfunc int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops,
return 0;
}

/**
* bpf_xdp_pull_data() - Pull in non-linear xdp data.
* @x: &xdp_md associated with the XDP buffer
* @len: length of data to be made directly accessible in the linear part
* @flags: future use, must be zero
*
* Pull in non-linear data in case the XDP buffer associated with @x is
* non-linear and not all @len are in the linear data area.
*
* Direct packet access allows reading and writing linear XDP data through
* packet pointers (i.e., &xdp_md->data + offsets). When an eBPF program wants
* to directly access data that may be in the non-linear area, call this kfunc
* to make sure the data is available in the linear area.
*
* This kfunc can also be used with bpf_xdp_adjust_head() to decapsulate
* headers in the non-linear data area.
*
* A call to this kfunc is susceptible to change the underlying packet buffer.
* Therefore, at load time, all checks on pointers previously done by the
* verifier are invalidated and must be performed again, if the kfunc is used
* in combination with direct packet access.
*
* Return:
* * %0 - success
* * %-EINVAL - invalid len or flags
*/
__bpf_kfunc int bpf_xdp_pull_data(struct xdp_md *x, u32 len, u64 flags)
{
struct xdp_buff *xdp = (struct xdp_buff *)x;
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
void *data_hard_end = xdp_data_hard_end(xdp);
void *data_end = xdp->data + len;
int i, delta, n_frags_free = 0, len_free = 0;

if (flags)
return -EINVAL;

if (unlikely(len > xdp_get_buff_len(xdp)))
return -EINVAL;

if (unlikely(data_end < xdp->data || data_end > data_hard_end))
return -EINVAL;

delta = data_end - xdp->data_end;
if (delta <= 0)
return 0;

for (i = 0; i < sinfo->nr_frags && delta; i++) {
skb_frag_t *frag = &sinfo->frags[i];
u32 shrink = min_t(u32, delta, skb_frag_size(frag));

memcpy(xdp->data_end + len_free, skb_frag_address(frag), shrink);

len_free += shrink;
delta -= shrink;
if (bpf_xdp_shrink_data(xdp, frag, shrink, false))
n_frags_free++;
}

if (unlikely(n_frags_free)) {
memmove(sinfo->frags, sinfo->frags + n_frags_free,
(sinfo->nr_frags - n_frags_free) * sizeof(skb_frag_t));

sinfo->nr_frags -= n_frags_free;

if (!sinfo->nr_frags)
xdp_buff_clear_frags_flag(xdp);
}

sinfo->xdp_frags_size -= len_free;
xdp->data_end = data_end;

return 0;
}

__bpf_kfunc_end_defs();

int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
Expand Down Expand Up @@ -12233,6 +12314,7 @@ BTF_KFUNCS_END(bpf_kfunc_check_set_skb_meta)

BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
BTF_ID_FLAGS(func, bpf_xdp_pull_data)
BTF_KFUNCS_END(bpf_kfunc_check_set_xdp)

BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,7 @@ void test_xdp_context_test_run(void)
/* Meta data must be 255 bytes or smaller */
test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0);

/* Total size of data must match data_end - data_meta */
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
sizeof(data) - 1, 0, 0, 0);
/* Total size of data must be data_end - data_meta or larger */
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
sizeof(data) + 1, 0, 0, 0);

Expand Down
Loading
Loading