@@ -4153,34 +4153,45 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
41534153 return 0 ;
41544154}
41554155
4156- static void bpf_xdp_shrink_data_zc (struct xdp_buff * xdp , int shrink ,
4157- enum xdp_mem_type mem_type , bool release )
4156+ static struct xdp_buff * bpf_xdp_shrink_data_zc (struct xdp_buff * xdp , int shrink ,
4157+ bool tail , bool release )
41584158{
4159- struct xdp_buff * zc_frag = xsk_buff_get_tail (xdp );
4159+ struct xdp_buff * zc_frag = tail ? xsk_buff_get_tail (xdp ) :
4160+ xsk_buff_get_head (xdp );
41604161
41614162 if (release ) {
4162- xsk_buff_del_tail (zc_frag );
4163- __xdp_return (0 , mem_type , false, zc_frag );
4163+ xsk_buff_del_frag (zc_frag );
41644164 } else {
4165- zc_frag -> data_end -= shrink ;
4165+ if (tail )
4166+ zc_frag -> data_end -= shrink ;
4167+ else
4168+ zc_frag -> data += shrink ;
41664169 }
4170+
4171+ return zc_frag ;
41674172}
41684173
41694174static bool bpf_xdp_shrink_data (struct xdp_buff * xdp , skb_frag_t * frag ,
4170- int shrink )
4175+ int shrink , bool tail )
41714176{
41724177 enum xdp_mem_type mem_type = xdp -> rxq -> mem .type ;
41734178 bool release = skb_frag_size (frag ) == shrink ;
4179+ netmem_ref netmem = skb_frag_netmem (frag );
4180+ struct xdp_buff * zc_frag = NULL ;
41744181
41754182 if (mem_type == MEM_TYPE_XSK_BUFF_POOL ) {
4176- bpf_xdp_shrink_data_zc ( xdp , shrink , mem_type , release ) ;
4177- goto out ;
4183+ netmem = 0 ;
4184+ zc_frag = bpf_xdp_shrink_data_zc ( xdp , shrink , tail , release ) ;
41784185 }
41794186
4180- if (release )
4181- __xdp_return (skb_frag_netmem (frag ), mem_type , false, NULL );
4187+ if (release ) {
4188+ __xdp_return (netmem , mem_type , false, zc_frag );
4189+ } else {
4190+ if (!tail )
4191+ skb_frag_off_add (frag , shrink );
4192+ skb_frag_size_sub (frag , shrink );
4193+ }
41824194
4183- out :
41844195 return release ;
41854196}
41864197
@@ -4198,18 +4209,15 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
41984209
41994210 len_free += shrink ;
42004211 offset -= shrink ;
4201- if (bpf_xdp_shrink_data (xdp , frag , shrink )) {
4212+ if (bpf_xdp_shrink_data (xdp , frag , shrink , true))
42024213 n_frags_free ++ ;
4203- } else {
4204- skb_frag_size_sub (frag , shrink );
4205- break ;
4206- }
42074214 }
42084215 sinfo -> nr_frags -= n_frags_free ;
42094216 sinfo -> xdp_frags_size -= len_free ;
42104217
42114218 if (unlikely (!sinfo -> nr_frags )) {
42124219 xdp_buff_clear_frags_flag (xdp );
4220+ xdp_buff_clear_frag_pfmemalloc (xdp );
42134221 xdp -> data_end -= offset ;
42144222 }
42154223
@@ -12210,6 +12218,98 @@ __bpf_kfunc int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops,
1221012218 return 0 ;
1221112219}
1221212220
12221+ /**
12222+ * bpf_xdp_pull_data() - Pull in non-linear xdp data.
12223+ * @x: &xdp_md associated with the XDP buffer
12224+ * @len: length of data to be made directly accessible in the linear part
12225+ *
12226+ * Pull in data in case the XDP buffer associated with @x is non-linear and
12227+ * not all @len are in the linear data area.
12228+ *
12229+ * Direct packet access allows reading and writing linear XDP data through
12230+ * packet pointers (i.e., &xdp_md->data + offsets). The amount of data which
12231+ * ends up in the linear part of the xdp_buff depends on the NIC and its
12232+ * configuration. When a frag-capable XDP program wants to directly access
12233+ * headers that may be in the non-linear area, call this kfunc to make sure
12234+ * the data is available in the linear area. Alternatively, use dynptr or
12235+ * bpf_xdp_{load,store}_bytes() to access data without pulling.
12236+ *
12237+ * This kfunc can also be used with bpf_xdp_adjust_head() to decapsulate
12238+ * headers in the non-linear data area.
12239+ *
12240+ * A call to this kfunc may reduce headroom. If there is not enough tailroom
12241+ * in the linear data area, metadata and data will be shifted down.
12242+ *
12243+ * A call to this kfunc is susceptible to change the buffer geometry.
12244+ * Therefore, at load time, all checks on pointers previously done by the
12245+ * verifier are invalidated and must be performed again, if the kfunc is used
12246+ * in combination with direct packet access.
12247+ *
12248+ * Return:
12249+ * * %0 - success
12250+ * * %-EINVAL - invalid len
12251+ */
12252+ __bpf_kfunc int bpf_xdp_pull_data (struct xdp_md * x , u32 len )
12253+ {
12254+ struct xdp_buff * xdp = (struct xdp_buff * )x ;
12255+ struct skb_shared_info * sinfo = xdp_get_shared_info_from_buff (xdp );
12256+ int i , delta , shift , headroom , tailroom , n_frags_free = 0 ;
12257+ void * data_hard_end = xdp_data_hard_end (xdp );
12258+ int data_len = xdp -> data_end - xdp -> data ;
12259+ void * start ;
12260+
12261+ if (len <= data_len )
12262+ return 0 ;
12263+
12264+ if (unlikely (len > xdp_get_buff_len (xdp )))
12265+ return - EINVAL ;
12266+
12267+ start = xdp_data_meta_unsupported (xdp ) ? xdp -> data : xdp -> data_meta ;
12268+
12269+ headroom = start - xdp -> data_hard_start - sizeof (struct xdp_frame );
12270+ tailroom = data_hard_end - xdp -> data_end ;
12271+
12272+ delta = len - data_len ;
12273+ if (unlikely (delta > tailroom + headroom ))
12274+ return - EINVAL ;
12275+
12276+ shift = delta - tailroom ;
12277+ if (shift > 0 ) {
12278+ memmove (start - shift , start , xdp -> data_end - start );
12279+
12280+ xdp -> data_meta -= shift ;
12281+ xdp -> data -= shift ;
12282+ xdp -> data_end -= shift ;
12283+ }
12284+
12285+ for (i = 0 ; i < sinfo -> nr_frags && delta ; i ++ ) {
12286+ skb_frag_t * frag = & sinfo -> frags [i ];
12287+ u32 shrink = min_t (u32 , delta , skb_frag_size (frag ));
12288+
12289+ memcpy (xdp -> data_end , skb_frag_address (frag ), shrink );
12290+
12291+ xdp -> data_end += shrink ;
12292+ sinfo -> xdp_frags_size -= shrink ;
12293+ delta -= shrink ;
12294+ if (bpf_xdp_shrink_data (xdp , frag , shrink , false))
12295+ n_frags_free ++ ;
12296+ }
12297+
12298+ if (unlikely (n_frags_free )) {
12299+ memmove (sinfo -> frags , sinfo -> frags + n_frags_free ,
12300+ (sinfo -> nr_frags - n_frags_free ) * sizeof (skb_frag_t ));
12301+
12302+ sinfo -> nr_frags -= n_frags_free ;
12303+
12304+ if (!sinfo -> nr_frags ) {
12305+ xdp_buff_clear_frags_flag (xdp );
12306+ xdp_buff_clear_frag_pfmemalloc (xdp );
12307+ }
12308+ }
12309+
12310+ return 0 ;
12311+ }
12312+
1221312313__bpf_kfunc_end_defs ();
1221412314
1221512315int bpf_dynptr_from_skb_rdonly (struct __sk_buff * skb , u64 flags ,
@@ -12237,6 +12337,7 @@ BTF_KFUNCS_END(bpf_kfunc_check_set_skb_meta)
1223712337
1223812338BTF_KFUNCS_START (bpf_kfunc_check_set_xdp )
1223912339BTF_ID_FLAGS (func , bpf_dynptr_from_xdp )
12340+ BTF_ID_FLAGS (func , bpf_xdp_pull_data )
1224012341BTF_KFUNCS_END (bpf_kfunc_check_set_xdp )
1224112342
1224212343BTF_KFUNCS_START (bpf_kfunc_check_set_sock_addr )
0 commit comments