Skip to content

Commit 34f033a

Browse files
author
Martin KaFai Lau
committed
Merge branch 'bpf-next/xdp_pull_data' into 'bpf-next/master'
Merge the xdp_pull_data stable branch into the master branch. No conflict. Signed-off-by: Martin KaFai Lau <[email protected]>
2 parents 8b52d09 + 5000380 commit 34f033a

File tree

9 files changed

+479
-52
lines changed

9 files changed

+479
-52
lines changed

include/net/xdp.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,11 @@ static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp)
115115
xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
116116
}
117117

118+
static __always_inline void xdp_buff_clear_frag_pfmemalloc(struct xdp_buff *xdp)
119+
{
120+
xdp->flags &= ~XDP_FLAGS_FRAGS_PF_MEMALLOC;
121+
}
122+
118123
static __always_inline void
119124
xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq)
120125
{

include/net/xdp_sock_drv.h

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,13 +160,23 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
160160
return ret;
161161
}
162162

163-
static inline void xsk_buff_del_tail(struct xdp_buff *tail)
163+
static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
164164
{
165-
struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);
165+
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
166166

167167
list_del(&xskb->list_node);
168168
}
169169

170+
static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
171+
{
172+
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
173+
struct xdp_buff_xsk *frag;
174+
175+
frag = list_first_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
176+
list_node);
177+
return &frag->xdp;
178+
}
179+
170180
static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
171181
{
172182
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
@@ -389,8 +399,13 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
389399
return NULL;
390400
}
391401

392-
static inline void xsk_buff_del_tail(struct xdp_buff *tail)
402+
static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
403+
{
404+
}
405+
406+
static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
393407
{
408+
return NULL;
394409
}
395410

396411
static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)

kernel/bpf/verifier.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12234,6 +12234,7 @@ enum special_kfunc_type {
1223412234
KF_bpf_dynptr_from_skb,
1223512235
KF_bpf_dynptr_from_xdp,
1223612236
KF_bpf_dynptr_from_skb_meta,
12237+
KF_bpf_xdp_pull_data,
1223712238
KF_bpf_dynptr_slice,
1223812239
KF_bpf_dynptr_slice_rdwr,
1223912240
KF_bpf_dynptr_clone,
@@ -12286,10 +12287,12 @@ BTF_ID(func, bpf_rbtree_right)
1228612287
BTF_ID(func, bpf_dynptr_from_skb)
1228712288
BTF_ID(func, bpf_dynptr_from_xdp)
1228812289
BTF_ID(func, bpf_dynptr_from_skb_meta)
12290+
BTF_ID(func, bpf_xdp_pull_data)
1228912291
#else
1229012292
BTF_ID_UNUSED
1229112293
BTF_ID_UNUSED
1229212294
BTF_ID_UNUSED
12295+
BTF_ID_UNUSED
1229312296
#endif
1229412297
BTF_ID(func, bpf_dynptr_slice)
1229512298
BTF_ID(func, bpf_dynptr_slice_rdwr)
@@ -12367,6 +12370,11 @@ static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
1236712370
return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
1236812371
}
1236912372

12373+
static bool is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta)
12374+
{
12375+
return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data];
12376+
}
12377+
1237012378
static enum kfunc_ptr_arg_type
1237112379
get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
1237212380
struct bpf_kfunc_call_arg_meta *meta,
@@ -14127,6 +14135,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
1412714135
}
1412814136
}
1412914137

14138+
if (is_kfunc_pkt_changing(&meta))
14139+
clear_all_pkt_pointers(env);
14140+
1413014141
nargs = btf_type_vlen(meta.func_proto);
1413114142
args = (const struct btf_param *)(meta.func_proto + 1);
1413214143
for (i = 0; i < nargs; i++) {
@@ -17843,6 +17854,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
1784317854
*/
1784417855
if (ret == 0 && is_kfunc_sleepable(&meta))
1784517856
mark_subprog_might_sleep(env, t);
17857+
if (ret == 0 && is_kfunc_pkt_changing(&meta))
17858+
mark_subprog_changes_pkt_data(env, t);
1784617859
}
1784717860
return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
1784817861

net/bpf/test_run.c

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,7 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
665665
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
666666
void *data;
667667

668-
if (user_size < ETH_HLEN || user_size > PAGE_SIZE - headroom - tailroom)
668+
if (user_size > PAGE_SIZE - headroom - tailroom)
669669
return ERR_PTR(-EINVAL);
670670

671671
size = SKB_DATA_ALIGN(size);
@@ -1001,6 +1001,9 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
10011001
kattr->test.cpu || kattr->test.batch_size)
10021002
return -EINVAL;
10031003

1004+
if (size < ETH_HLEN)
1005+
return -EINVAL;
1006+
10041007
data = bpf_test_init(kattr, kattr->test.data_size_in,
10051008
size, NET_SKB_PAD + NET_IP_ALIGN,
10061009
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
@@ -1207,9 +1210,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
12071210
{
12081211
bool do_live = (kattr->test.flags & BPF_F_TEST_XDP_LIVE_FRAMES);
12091212
u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1213+
u32 retval = 0, meta_sz = 0, duration, max_linear_sz, size;
1214+
u32 linear_sz = kattr->test.data_size_in;
12101215
u32 batch_size = kattr->test.batch_size;
1211-
u32 retval = 0, duration, max_data_sz;
1212-
u32 size = kattr->test.data_size_in;
12131216
u32 headroom = XDP_PACKET_HEADROOM;
12141217
u32 repeat = kattr->test.repeat;
12151218
struct netdev_rx_queue *rxqueue;
@@ -1246,39 +1249,45 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
12461249

12471250
if (ctx) {
12481251
/* There can't be user provided data before the meta data */
1249-
if (ctx->data_meta || ctx->data_end != size ||
1252+
if (ctx->data_meta || ctx->data_end > kattr->test.data_size_in ||
12501253
ctx->data > ctx->data_end ||
12511254
unlikely(xdp_metalen_invalid(ctx->data)) ||
12521255
(do_live && (kattr->test.data_out || kattr->test.ctx_out)))
12531256
goto free_ctx;
12541257
/* Meta data is allocated from the headroom */
12551258
headroom -= ctx->data;
1256-
}
12571259

1258-
max_data_sz = PAGE_SIZE - headroom - tailroom;
1259-
if (size > max_data_sz) {
1260-
/* disallow live data mode for jumbo frames */
1261-
if (do_live)
1262-
goto free_ctx;
1263-
size = max_data_sz;
1260+
meta_sz = ctx->data;
1261+
linear_sz = ctx->data_end;
12641262
}
12651263

1266-
data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom);
1264+
max_linear_sz = PAGE_SIZE - headroom - tailroom;
1265+
linear_sz = min_t(u32, linear_sz, max_linear_sz);
1266+
1267+
/* disallow live data mode for jumbo frames */
1268+
if (do_live && kattr->test.data_size_in > linear_sz)
1269+
goto free_ctx;
1270+
1271+
if (kattr->test.data_size_in - meta_sz < ETH_HLEN)
1272+
return -EINVAL;
1273+
1274+
data = bpf_test_init(kattr, linear_sz, max_linear_sz, headroom, tailroom);
12671275
if (IS_ERR(data)) {
12681276
ret = PTR_ERR(data);
12691277
goto free_ctx;
12701278
}
12711279

12721280
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
1273-
rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom;
1281+
rxqueue->xdp_rxq.frag_size = PAGE_SIZE;
12741282
xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq);
1275-
xdp_prepare_buff(&xdp, data, headroom, size, true);
1283+
xdp_prepare_buff(&xdp, data, headroom, linear_sz, true);
12761284
sinfo = xdp_get_shared_info_from_buff(&xdp);
12771285

12781286
ret = xdp_convert_md_to_buff(ctx, &xdp);
12791287
if (ret)
12801288
goto free_data;
12811289

1290+
size = linear_sz;
12821291
if (unlikely(kattr->test.data_size_in > size)) {
12831292
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
12841293

net/core/filter.c

Lines changed: 118 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4153,34 +4153,45 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
41534153
return 0;
41544154
}
41554155

4156-
static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
4157-
enum xdp_mem_type mem_type, bool release)
4156+
static struct xdp_buff *bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
4157+
bool tail, bool release)
41584158
{
4159-
struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);
4159+
struct xdp_buff *zc_frag = tail ? xsk_buff_get_tail(xdp) :
4160+
xsk_buff_get_head(xdp);
41604161

41614162
if (release) {
4162-
xsk_buff_del_tail(zc_frag);
4163-
__xdp_return(0, mem_type, false, zc_frag);
4163+
xsk_buff_del_frag(zc_frag);
41644164
} else {
4165-
zc_frag->data_end -= shrink;
4165+
if (tail)
4166+
zc_frag->data_end -= shrink;
4167+
else
4168+
zc_frag->data += shrink;
41664169
}
4170+
4171+
return zc_frag;
41674172
}
41684173

41694174
static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
4170-
int shrink)
4175+
int shrink, bool tail)
41714176
{
41724177
enum xdp_mem_type mem_type = xdp->rxq->mem.type;
41734178
bool release = skb_frag_size(frag) == shrink;
4179+
netmem_ref netmem = skb_frag_netmem(frag);
4180+
struct xdp_buff *zc_frag = NULL;
41744181

41754182
if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
4176-
bpf_xdp_shrink_data_zc(xdp, shrink, mem_type, release);
4177-
goto out;
4183+
netmem = 0;
4184+
zc_frag = bpf_xdp_shrink_data_zc(xdp, shrink, tail, release);
41784185
}
41794186

4180-
if (release)
4181-
__xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);
4187+
if (release) {
4188+
__xdp_return(netmem, mem_type, false, zc_frag);
4189+
} else {
4190+
if (!tail)
4191+
skb_frag_off_add(frag, shrink);
4192+
skb_frag_size_sub(frag, shrink);
4193+
}
41824194

4183-
out:
41844195
return release;
41854196
}
41864197

@@ -4198,18 +4209,15 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
41984209

41994210
len_free += shrink;
42004211
offset -= shrink;
4201-
if (bpf_xdp_shrink_data(xdp, frag, shrink)) {
4212+
if (bpf_xdp_shrink_data(xdp, frag, shrink, true))
42024213
n_frags_free++;
4203-
} else {
4204-
skb_frag_size_sub(frag, shrink);
4205-
break;
4206-
}
42074214
}
42084215
sinfo->nr_frags -= n_frags_free;
42094216
sinfo->xdp_frags_size -= len_free;
42104217

42114218
if (unlikely(!sinfo->nr_frags)) {
42124219
xdp_buff_clear_frags_flag(xdp);
4220+
xdp_buff_clear_frag_pfmemalloc(xdp);
42134221
xdp->data_end -= offset;
42144222
}
42154223

@@ -12210,6 +12218,98 @@ __bpf_kfunc int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops,
1221012218
return 0;
1221112219
}
1221212220

12221+
/**
12222+
* bpf_xdp_pull_data() - Pull in non-linear xdp data.
12223+
* @x: &xdp_md associated with the XDP buffer
12224+
* @len: length of data to be made directly accessible in the linear part
12225+
*
12226+
* Pull in data in case the XDP buffer associated with @x is non-linear and
12227+
* not all @len are in the linear data area.
12228+
*
12229+
* Direct packet access allows reading and writing linear XDP data through
12230+
* packet pointers (i.e., &xdp_md->data + offsets). The amount of data which
12231+
* ends up in the linear part of the xdp_buff depends on the NIC and its
12232+
* configuration. When a frag-capable XDP program wants to directly access
12233+
* headers that may be in the non-linear area, call this kfunc to make sure
12234+
* the data is available in the linear area. Alternatively, use dynptr or
12235+
* bpf_xdp_{load,store}_bytes() to access data without pulling.
12236+
*
12237+
* This kfunc can also be used with bpf_xdp_adjust_head() to decapsulate
12238+
* headers in the non-linear data area.
12239+
*
12240+
* A call to this kfunc may reduce headroom. If there is not enough tailroom
12241+
* in the linear data area, metadata and data will be shifted down.
12242+
*
12243+
* A call to this kfunc is susceptible to change the buffer geometry.
12244+
* Therefore, at load time, all checks on pointers previously done by the
12245+
* verifier are invalidated and must be performed again, if the kfunc is used
12246+
* in combination with direct packet access.
12247+
*
12248+
* Return:
12249+
* * %0 - success
12250+
* * %-EINVAL - invalid len
12251+
*/
12252+
__bpf_kfunc int bpf_xdp_pull_data(struct xdp_md *x, u32 len)
12253+
{
12254+
struct xdp_buff *xdp = (struct xdp_buff *)x;
12255+
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
12256+
int i, delta, shift, headroom, tailroom, n_frags_free = 0;
12257+
void *data_hard_end = xdp_data_hard_end(xdp);
12258+
int data_len = xdp->data_end - xdp->data;
12259+
void *start;
12260+
12261+
if (len <= data_len)
12262+
return 0;
12263+
12264+
if (unlikely(len > xdp_get_buff_len(xdp)))
12265+
return -EINVAL;
12266+
12267+
start = xdp_data_meta_unsupported(xdp) ? xdp->data : xdp->data_meta;
12268+
12269+
headroom = start - xdp->data_hard_start - sizeof(struct xdp_frame);
12270+
tailroom = data_hard_end - xdp->data_end;
12271+
12272+
delta = len - data_len;
12273+
if (unlikely(delta > tailroom + headroom))
12274+
return -EINVAL;
12275+
12276+
shift = delta - tailroom;
12277+
if (shift > 0) {
12278+
memmove(start - shift, start, xdp->data_end - start);
12279+
12280+
xdp->data_meta -= shift;
12281+
xdp->data -= shift;
12282+
xdp->data_end -= shift;
12283+
}
12284+
12285+
for (i = 0; i < sinfo->nr_frags && delta; i++) {
12286+
skb_frag_t *frag = &sinfo->frags[i];
12287+
u32 shrink = min_t(u32, delta, skb_frag_size(frag));
12288+
12289+
memcpy(xdp->data_end, skb_frag_address(frag), shrink);
12290+
12291+
xdp->data_end += shrink;
12292+
sinfo->xdp_frags_size -= shrink;
12293+
delta -= shrink;
12294+
if (bpf_xdp_shrink_data(xdp, frag, shrink, false))
12295+
n_frags_free++;
12296+
}
12297+
12298+
if (unlikely(n_frags_free)) {
12299+
memmove(sinfo->frags, sinfo->frags + n_frags_free,
12300+
(sinfo->nr_frags - n_frags_free) * sizeof(skb_frag_t));
12301+
12302+
sinfo->nr_frags -= n_frags_free;
12303+
12304+
if (!sinfo->nr_frags) {
12305+
xdp_buff_clear_frags_flag(xdp);
12306+
xdp_buff_clear_frag_pfmemalloc(xdp);
12307+
}
12308+
}
12309+
12310+
return 0;
12311+
}
12312+
1221312313
__bpf_kfunc_end_defs();
1221412314

1221512315
int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
@@ -12237,6 +12337,7 @@ BTF_KFUNCS_END(bpf_kfunc_check_set_skb_meta)
1223712337

1223812338
BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
1223912339
BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
12340+
BTF_ID_FLAGS(func, bpf_xdp_pull_data)
1224012341
BTF_KFUNCS_END(bpf_kfunc_check_set_xdp)
1224112342

1224212343
BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr)

tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,7 @@ void test_xdp_context_test_run(void)
9797
/* Meta data must be 255 bytes or smaller */
9898
test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0);
9999

100-
/* Total size of data must match data_end - data_meta */
101-
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
102-
sizeof(data) - 1, 0, 0, 0);
100+
/* Total size of data must be data_end - data_meta or larger */
103101
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
104102
sizeof(data) + 1, 0, 0, 0);
105103

0 commit comments

Comments
 (0)