Skip to content

Commit 4dce1a0

Browse files
ameryhungMartin KaFai Lau
authored andcommitted
bpf: Support pulling non-linear xdp data
Add kfunc, bpf_xdp_pull_data(), to support pulling data from xdp fragments. Similar to bpf_skb_pull_data(), bpf_xdp_pull_data() makes the first len bytes of data directly readable and writable in bpf programs. If the "len" argument is larger than the linear data size, data in fragments will be copied to the linear data area when there is enough room. Specifically, the kfunc will try to use the tailroom first. When the tailroom is not enough, metadata and data will be shifted down to make room for pulling data. A use case of the kfunc is to decapsulate headers residing in xdp fragments. It is possible for a NIC driver to place headers in xdp fragments. To keep using direct packet access for parsing and decapsulating headers, users can pull headers into the linear data area by calling bpf_xdp_pull_data() and then pop the header with bpf_xdp_adjust_head(). Signed-off-by: Amery Hung <[email protected]> Signed-off-by: Martin KaFai Lau <[email protected]> Reviewed-by: Jakub Kicinski <[email protected]> Link: https://patch.msgid.link/[email protected]
1 parent dea1526 commit 4dce1a0

File tree

1 file changed

+93
-0
lines changed

1 file changed

+93
-0
lines changed

net/core/filter.c

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12214,6 +12214,98 @@ __bpf_kfunc int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops,
1221412214
return 0;
1221512215
}
1221612216

12217+
/**
12218+
* bpf_xdp_pull_data() - Pull in non-linear xdp data.
12219+
* @x: &xdp_md associated with the XDP buffer
12220+
* @len: length of data to be made directly accessible in the linear part
12221+
*
12222+
* Pull in data in case the XDP buffer associated with @x is non-linear and
12223+
* not all @len are in the linear data area.
12224+
*
12225+
* Direct packet access allows reading and writing linear XDP data through
12226+
* packet pointers (i.e., &xdp_md->data + offsets). The amount of data which
12227+
* ends up in the linear part of the xdp_buff depends on the NIC and its
12228+
* configuration. When a frag-capable XDP program wants to directly access
12229+
* headers that may be in the non-linear area, call this kfunc to make sure
12230+
* the data is available in the linear area. Alternatively, use dynptr or
12231+
* bpf_xdp_{load,store}_bytes() to access data without pulling.
12232+
*
12233+
* This kfunc can also be used with bpf_xdp_adjust_head() to decapsulate
12234+
* headers in the non-linear data area.
12235+
*
12236+
* A call to this kfunc may reduce headroom. If there is not enough tailroom
12237+
* in the linear data area, metadata and data will be shifted down.
12238+
*
12239+
* A call to this kfunc is susceptible to change the buffer geometry.
12240+
* Therefore, at load time, all checks on pointers previously done by the
12241+
* verifier are invalidated and must be performed again, if the kfunc is used
12242+
* in combination with direct packet access.
12243+
*
12244+
* Return:
12245+
* * %0 - success
12246+
* * %-EINVAL - invalid len
12247+
*/
12248+
__bpf_kfunc int bpf_xdp_pull_data(struct xdp_md *x, u32 len)
12249+
{
12250+
struct xdp_buff *xdp = (struct xdp_buff *)x;
12251+
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
12252+
int i, delta, shift, headroom, tailroom, n_frags_free = 0;
12253+
void *data_hard_end = xdp_data_hard_end(xdp);
12254+
int data_len = xdp->data_end - xdp->data;
12255+
void *start;
12256+
12257+
if (len <= data_len)
12258+
return 0;
12259+
12260+
if (unlikely(len > xdp_get_buff_len(xdp)))
12261+
return -EINVAL;
12262+
12263+
start = xdp_data_meta_unsupported(xdp) ? xdp->data : xdp->data_meta;
12264+
12265+
headroom = start - xdp->data_hard_start - sizeof(struct xdp_frame);
12266+
tailroom = data_hard_end - xdp->data_end;
12267+
12268+
delta = len - data_len;
12269+
if (unlikely(delta > tailroom + headroom))
12270+
return -EINVAL;
12271+
12272+
shift = delta - tailroom;
12273+
if (shift > 0) {
12274+
memmove(start - shift, start, xdp->data_end - start);
12275+
12276+
xdp->data_meta -= shift;
12277+
xdp->data -= shift;
12278+
xdp->data_end -= shift;
12279+
}
12280+
12281+
for (i = 0; i < sinfo->nr_frags && delta; i++) {
12282+
skb_frag_t *frag = &sinfo->frags[i];
12283+
u32 shrink = min_t(u32, delta, skb_frag_size(frag));
12284+
12285+
memcpy(xdp->data_end, skb_frag_address(frag), shrink);
12286+
12287+
xdp->data_end += shrink;
12288+
sinfo->xdp_frags_size -= shrink;
12289+
delta -= shrink;
12290+
if (bpf_xdp_shrink_data(xdp, frag, shrink, false))
12291+
n_frags_free++;
12292+
}
12293+
12294+
if (unlikely(n_frags_free)) {
12295+
memmove(sinfo->frags, sinfo->frags + n_frags_free,
12296+
(sinfo->nr_frags - n_frags_free) * sizeof(skb_frag_t));
12297+
12298+
sinfo->nr_frags -= n_frags_free;
12299+
12300+
if (!sinfo->nr_frags) {
12301+
xdp_buff_clear_frags_flag(xdp);
12302+
xdp_buff_clear_frag_pfmemalloc(xdp);
12303+
}
12304+
}
12305+
12306+
return 0;
12307+
}
12308+
1221712309
__bpf_kfunc_end_defs();
1221812310

1221912311
int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
@@ -12241,6 +12333,7 @@ BTF_KFUNCS_END(bpf_kfunc_check_set_skb_meta)
1224112333

1224212334
BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
1224312335
BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
12336+
BTF_ID_FLAGS(func, bpf_xdp_pull_data)
1224412337
BTF_KFUNCS_END(bpf_kfunc_check_set_xdp)
1224512338

1224612339
BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr)

0 commit comments

Comments
 (0)