Skip to content

Commit 55d5a51

Browse files
author
Martin KaFai Lau
committed
Merge branch 'bpf-next/xdp_pull_data' into 'bpf-next/net'
Merge the xdp_pull_data stable branch into the net branch. No conflict. Signed-off-by: Martin KaFai Lau <[email protected]>
2 parents 2dfd8b8 + 5000380 commit 55d5a51

File tree

9 files changed

+479
-52
lines changed

9 files changed

+479
-52
lines changed

include/net/xdp.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,11 @@ static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp)
115115
xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
116116
}
117117

118+
static __always_inline void xdp_buff_clear_frag_pfmemalloc(struct xdp_buff *xdp)
119+
{
120+
xdp->flags &= ~XDP_FLAGS_FRAGS_PF_MEMALLOC;
121+
}
122+
118123
static __always_inline void
119124
xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq)
120125
{

include/net/xdp_sock_drv.h

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,13 +160,23 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
160160
return ret;
161161
}
162162

163-
static inline void xsk_buff_del_tail(struct xdp_buff *tail)
163+
static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
164164
{
165-
struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);
165+
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
166166

167167
list_del(&xskb->list_node);
168168
}
169169

170+
static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
171+
{
172+
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
173+
struct xdp_buff_xsk *frag;
174+
175+
frag = list_first_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
176+
list_node);
177+
return &frag->xdp;
178+
}
179+
170180
static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
171181
{
172182
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
@@ -389,8 +399,13 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
389399
return NULL;
390400
}
391401

392-
static inline void xsk_buff_del_tail(struct xdp_buff *tail)
402+
static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
403+
{
404+
}
405+
406+
static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
393407
{
408+
return NULL;
394409
}
395410

396411
static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)

kernel/bpf/verifier.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12235,6 +12235,7 @@ enum special_kfunc_type {
1223512235
KF_bpf_dynptr_from_skb,
1223612236
KF_bpf_dynptr_from_xdp,
1223712237
KF_bpf_dynptr_from_skb_meta,
12238+
KF_bpf_xdp_pull_data,
1223812239
KF_bpf_dynptr_slice,
1223912240
KF_bpf_dynptr_slice_rdwr,
1224012241
KF_bpf_dynptr_clone,
@@ -12285,10 +12286,12 @@ BTF_ID(func, bpf_rbtree_right)
1228512286
BTF_ID(func, bpf_dynptr_from_skb)
1228612287
BTF_ID(func, bpf_dynptr_from_xdp)
1228712288
BTF_ID(func, bpf_dynptr_from_skb_meta)
12289+
BTF_ID(func, bpf_xdp_pull_data)
1228812290
#else
1228912291
BTF_ID_UNUSED
1229012292
BTF_ID_UNUSED
1229112293
BTF_ID_UNUSED
12294+
BTF_ID_UNUSED
1229212295
#endif
1229312296
BTF_ID(func, bpf_dynptr_slice)
1229412297
BTF_ID(func, bpf_dynptr_slice_rdwr)
@@ -12358,6 +12361,11 @@ static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
1235812361
return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
1235912362
}
1236012363

12364+
static bool is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta)
12365+
{
12366+
return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data];
12367+
}
12368+
1236112369
static enum kfunc_ptr_arg_type
1236212370
get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
1236312371
struct bpf_kfunc_call_arg_meta *meta,
@@ -14077,6 +14085,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
1407714085
}
1407814086
}
1407914087

14088+
if (is_kfunc_pkt_changing(&meta))
14089+
clear_all_pkt_pointers(env);
14090+
1408014091
nargs = btf_type_vlen(meta.func_proto);
1408114092
args = (const struct btf_param *)(meta.func_proto + 1);
1408214093
for (i = 0; i < nargs; i++) {
@@ -17794,6 +17805,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
1779417805
*/
1779517806
if (ret == 0 && is_kfunc_sleepable(&meta))
1779617807
mark_subprog_might_sleep(env, t);
17808+
if (ret == 0 && is_kfunc_pkt_changing(&meta))
17809+
mark_subprog_changes_pkt_data(env, t);
1779717810
}
1779817811
return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
1779917812

net/bpf/test_run.c

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,7 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
665665
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
666666
void *data;
667667

668-
if (user_size < ETH_HLEN || user_size > PAGE_SIZE - headroom - tailroom)
668+
if (user_size > PAGE_SIZE - headroom - tailroom)
669669
return ERR_PTR(-EINVAL);
670670

671671
size = SKB_DATA_ALIGN(size);
@@ -1001,6 +1001,9 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
10011001
kattr->test.cpu || kattr->test.batch_size)
10021002
return -EINVAL;
10031003

1004+
if (size < ETH_HLEN)
1005+
return -EINVAL;
1006+
10041007
data = bpf_test_init(kattr, kattr->test.data_size_in,
10051008
size, NET_SKB_PAD + NET_IP_ALIGN,
10061009
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
@@ -1207,9 +1210,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
12071210
{
12081211
bool do_live = (kattr->test.flags & BPF_F_TEST_XDP_LIVE_FRAMES);
12091212
u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1213+
u32 retval = 0, meta_sz = 0, duration, max_linear_sz, size;
1214+
u32 linear_sz = kattr->test.data_size_in;
12101215
u32 batch_size = kattr->test.batch_size;
1211-
u32 retval = 0, duration, max_data_sz;
1212-
u32 size = kattr->test.data_size_in;
12131216
u32 headroom = XDP_PACKET_HEADROOM;
12141217
u32 repeat = kattr->test.repeat;
12151218
struct netdev_rx_queue *rxqueue;
@@ -1246,39 +1249,45 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
12461249

12471250
if (ctx) {
12481251
/* There can't be user provided data before the meta data */
1249-
if (ctx->data_meta || ctx->data_end != size ||
1252+
if (ctx->data_meta || ctx->data_end > kattr->test.data_size_in ||
12501253
ctx->data > ctx->data_end ||
12511254
unlikely(xdp_metalen_invalid(ctx->data)) ||
12521255
(do_live && (kattr->test.data_out || kattr->test.ctx_out)))
12531256
goto free_ctx;
12541257
/* Meta data is allocated from the headroom */
12551258
headroom -= ctx->data;
1256-
}
12571259

1258-
max_data_sz = PAGE_SIZE - headroom - tailroom;
1259-
if (size > max_data_sz) {
1260-
/* disallow live data mode for jumbo frames */
1261-
if (do_live)
1262-
goto free_ctx;
1263-
size = max_data_sz;
1260+
meta_sz = ctx->data;
1261+
linear_sz = ctx->data_end;
12641262
}
12651263

1266-
data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom);
1264+
max_linear_sz = PAGE_SIZE - headroom - tailroom;
1265+
linear_sz = min_t(u32, linear_sz, max_linear_sz);
1266+
1267+
/* disallow live data mode for jumbo frames */
1268+
if (do_live && kattr->test.data_size_in > linear_sz)
1269+
goto free_ctx;
1270+
1271+
if (kattr->test.data_size_in - meta_sz < ETH_HLEN)
1272+
return -EINVAL;
1273+
1274+
data = bpf_test_init(kattr, linear_sz, max_linear_sz, headroom, tailroom);
12671275
if (IS_ERR(data)) {
12681276
ret = PTR_ERR(data);
12691277
goto free_ctx;
12701278
}
12711279

12721280
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
1273-
rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom;
1281+
rxqueue->xdp_rxq.frag_size = PAGE_SIZE;
12741282
xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq);
1275-
xdp_prepare_buff(&xdp, data, headroom, size, true);
1283+
xdp_prepare_buff(&xdp, data, headroom, linear_sz, true);
12761284
sinfo = xdp_get_shared_info_from_buff(&xdp);
12771285

12781286
ret = xdp_convert_md_to_buff(ctx, &xdp);
12791287
if (ret)
12801288
goto free_data;
12811289

1290+
size = linear_sz;
12821291
if (unlikely(kattr->test.data_size_in > size)) {
12831292
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
12841293

net/core/filter.c

Lines changed: 118 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4153,34 +4153,45 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
41534153
return 0;
41544154
}
41554155

4156-
static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
4157-
enum xdp_mem_type mem_type, bool release)
4156+
static struct xdp_buff *bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
4157+
bool tail, bool release)
41584158
{
4159-
struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);
4159+
struct xdp_buff *zc_frag = tail ? xsk_buff_get_tail(xdp) :
4160+
xsk_buff_get_head(xdp);
41604161

41614162
if (release) {
4162-
xsk_buff_del_tail(zc_frag);
4163-
__xdp_return(0, mem_type, false, zc_frag);
4163+
xsk_buff_del_frag(zc_frag);
41644164
} else {
4165-
zc_frag->data_end -= shrink;
4165+
if (tail)
4166+
zc_frag->data_end -= shrink;
4167+
else
4168+
zc_frag->data += shrink;
41664169
}
4170+
4171+
return zc_frag;
41674172
}
41684173

41694174
static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
4170-
int shrink)
4175+
int shrink, bool tail)
41714176
{
41724177
enum xdp_mem_type mem_type = xdp->rxq->mem.type;
41734178
bool release = skb_frag_size(frag) == shrink;
4179+
netmem_ref netmem = skb_frag_netmem(frag);
4180+
struct xdp_buff *zc_frag = NULL;
41744181

41754182
if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
4176-
bpf_xdp_shrink_data_zc(xdp, shrink, mem_type, release);
4177-
goto out;
4183+
netmem = 0;
4184+
zc_frag = bpf_xdp_shrink_data_zc(xdp, shrink, tail, release);
41784185
}
41794186

4180-
if (release)
4181-
__xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);
4187+
if (release) {
4188+
__xdp_return(netmem, mem_type, false, zc_frag);
4189+
} else {
4190+
if (!tail)
4191+
skb_frag_off_add(frag, shrink);
4192+
skb_frag_size_sub(frag, shrink);
4193+
}
41824194

4183-
out:
41844195
return release;
41854196
}
41864197

@@ -4198,18 +4209,15 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
41984209

41994210
len_free += shrink;
42004211
offset -= shrink;
4201-
if (bpf_xdp_shrink_data(xdp, frag, shrink)) {
4212+
if (bpf_xdp_shrink_data(xdp, frag, shrink, true))
42024213
n_frags_free++;
4203-
} else {
4204-
skb_frag_size_sub(frag, shrink);
4205-
break;
4206-
}
42074214
}
42084215
sinfo->nr_frags -= n_frags_free;
42094216
sinfo->xdp_frags_size -= len_free;
42104217

42114218
if (unlikely(!sinfo->nr_frags)) {
42124219
xdp_buff_clear_frags_flag(xdp);
4220+
xdp_buff_clear_frag_pfmemalloc(xdp);
42134221
xdp->data_end -= offset;
42144222
}
42154223

@@ -12206,6 +12214,98 @@ __bpf_kfunc int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops,
1220612214
return 0;
1220712215
}
1220812216

12217+
/**
12218+
* bpf_xdp_pull_data() - Pull in non-linear xdp data.
12219+
* @x: &xdp_md associated with the XDP buffer
12220+
* @len: length of data to be made directly accessible in the linear part
12221+
*
12222+
* Pull in data in case the XDP buffer associated with @x is non-linear and
12223+
* not all @len are in the linear data area.
12224+
*
12225+
* Direct packet access allows reading and writing linear XDP data through
12226+
* packet pointers (i.e., &xdp_md->data + offsets). The amount of data which
12227+
* ends up in the linear part of the xdp_buff depends on the NIC and its
12228+
* configuration. When a frag-capable XDP program wants to directly access
12229+
* headers that may be in the non-linear area, call this kfunc to make sure
12230+
* the data is available in the linear area. Alternatively, use dynptr or
12231+
* bpf_xdp_{load,store}_bytes() to access data without pulling.
12232+
*
12233+
* This kfunc can also be used with bpf_xdp_adjust_head() to decapsulate
12234+
* headers in the non-linear data area.
12235+
*
12236+
* A call to this kfunc may reduce headroom. If there is not enough tailroom
12237+
* in the linear data area, metadata and data will be shifted down.
12238+
*
12239+
* A call to this kfunc is susceptible to change the buffer geometry.
12240+
* Therefore, at load time, all checks on pointers previously done by the
12241+
* verifier are invalidated and must be performed again, if the kfunc is used
12242+
* in combination with direct packet access.
12243+
*
12244+
* Return:
12245+
* * %0 - success
12246+
* * %-EINVAL - invalid len
12247+
*/
12248+
__bpf_kfunc int bpf_xdp_pull_data(struct xdp_md *x, u32 len)
12249+
{
12250+
struct xdp_buff *xdp = (struct xdp_buff *)x;
12251+
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
12252+
int i, delta, shift, headroom, tailroom, n_frags_free = 0;
12253+
void *data_hard_end = xdp_data_hard_end(xdp);
12254+
int data_len = xdp->data_end - xdp->data;
12255+
void *start;
12256+
12257+
if (len <= data_len)
12258+
return 0;
12259+
12260+
if (unlikely(len > xdp_get_buff_len(xdp)))
12261+
return -EINVAL;
12262+
12263+
start = xdp_data_meta_unsupported(xdp) ? xdp->data : xdp->data_meta;
12264+
12265+
headroom = start - xdp->data_hard_start - sizeof(struct xdp_frame);
12266+
tailroom = data_hard_end - xdp->data_end;
12267+
12268+
delta = len - data_len;
12269+
if (unlikely(delta > tailroom + headroom))
12270+
return -EINVAL;
12271+
12272+
shift = delta - tailroom;
12273+
if (shift > 0) {
12274+
memmove(start - shift, start, xdp->data_end - start);
12275+
12276+
xdp->data_meta -= shift;
12277+
xdp->data -= shift;
12278+
xdp->data_end -= shift;
12279+
}
12280+
12281+
for (i = 0; i < sinfo->nr_frags && delta; i++) {
12282+
skb_frag_t *frag = &sinfo->frags[i];
12283+
u32 shrink = min_t(u32, delta, skb_frag_size(frag));
12284+
12285+
memcpy(xdp->data_end, skb_frag_address(frag), shrink);
12286+
12287+
xdp->data_end += shrink;
12288+
sinfo->xdp_frags_size -= shrink;
12289+
delta -= shrink;
12290+
if (bpf_xdp_shrink_data(xdp, frag, shrink, false))
12291+
n_frags_free++;
12292+
}
12293+
12294+
if (unlikely(n_frags_free)) {
12295+
memmove(sinfo->frags, sinfo->frags + n_frags_free,
12296+
(sinfo->nr_frags - n_frags_free) * sizeof(skb_frag_t));
12297+
12298+
sinfo->nr_frags -= n_frags_free;
12299+
12300+
if (!sinfo->nr_frags) {
12301+
xdp_buff_clear_frags_flag(xdp);
12302+
xdp_buff_clear_frag_pfmemalloc(xdp);
12303+
}
12304+
}
12305+
12306+
return 0;
12307+
}
12308+
1220912309
__bpf_kfunc_end_defs();
1221012310

1221112311
int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
@@ -12233,6 +12333,7 @@ BTF_KFUNCS_END(bpf_kfunc_check_set_skb_meta)
1223312333

1223412334
BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
1223512335
BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
12336+
BTF_ID_FLAGS(func, bpf_xdp_pull_data)
1223612337
BTF_KFUNCS_END(bpf_kfunc_check_set_xdp)
1223712338

1223812339
BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr)

tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,7 @@ void test_xdp_context_test_run(void)
9797
/* Meta data must be 255 bytes or smaller */
9898
test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0);
9999

100-
/* Total size of data must match data_end - data_meta */
101-
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
102-
sizeof(data) - 1, 0, 0, 0);
100+
/* Total size of data must be data_end - data_meta or larger */
103101
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
104102
sizeof(data) + 1, 0, 0, 0);
105103

0 commit comments

Comments
 (0)