Skip to content

Commit ca93a67

Browse files
dtatuleaKernel Patches Daemon
authored andcommitted
xdp: Delegate fast path return decision to page_pool
XDP uses the BPF_RI_F_RF_NO_DIRECT flag to mark contexts where it is not allowed to do direct recycling, even though the direct flag was set by the caller. This is confusing and can lead to races which are hard to detect [1]. Furthermore, the page_pool already contains an internal mechanism which checks if it is safe to switch the direct flag from off to on. This patch drops the use of the BPF_RI_F_RF_NO_DIRECT flag and always calls the page_pool release with the direct flag set to false. The page_pool will decide if it is safe to do direct recycling. This is not free but it is worth it to make the XDP code safer. The next paragrapsh are discussing the performance impact. Performance wise, there are 3 cases to consider. Looking from __xdp_return() for MEM_TYPE_PAGE_POOL case: 1) napi_direct == false: - Before: 1 comparison in __xdp_return() + call of page_pool_napi_local() from page_pool_put_unrefed_netmem(). - After: Only one call to page_pool_napi_local(). 2) napi_direct == true && BPF_RI_F_RF_NO_DIRECT - Before: 2 comparisons in __xdp_return() + call of page_pool_napi_local() from page_pool_put_unrefed_netmem(). - After: Only one call to page_pool_napi_local(). 3) napi_direct == true && !BPF_RI_F_RF_NO_DIRECT - Before: 2 comparisons in __xdp_return(). - After: One call to page_pool_napi_local() Case 1 & 2 are the slower paths and they only have to gain. But they are slow anyway so the gain is small. Case 3 is the fast path and is the one that has to be considered more closely. The 2 comparisons from __xdp_return() are swapped for the more expensive page_pool_napi_local() call. Using the page_pool benchmark between the fast-path and the newly-added NAPI aware mode to measure [2] how expensive page_pool_napi_local() is: bench_page_pool: time_bench_page_pool01_fast_path(): in_serving_softirq fast-path bench_page_pool: Type:tasklet_page_pool01_fast_path Per elem: 15 cycles(tsc) 7.537 ns (step:0) bench_page_pool: time_bench_page_pool04_napi_aware(): in_serving_softirq fast-path bench_page_pool: Type:tasklet_page_pool04_napi_aware Per elem: 20 cycles(tsc) 10.490 ns (step:0) ... and the slow path for reference: bench_page_pool: time_bench_page_pool02_ptr_ring(): in_serving_softirq fast-path bench_page_pool: Type:tasklet_page_pool02_ptr_ring Per elem: 30 cycles(tsc) 15.395 ns (step:0) So the impact is small in the fast-path, but not negligible. One thing to consider is the fact that the comparisons from napi_direct are dropped. That means that the impact will be smaller than the measurements from the benchmark. [1] Commit 2b986b9 ("bpf, cpumap: Disable page_pool direct xdp_return need larger scope") [2] Intel Xeon Platinum 8580 Signed-off-by: Dragos Tatulea <[email protected]>
1 parent 6e47934 commit ca93a67

File tree

7 files changed

+14
-42
lines changed

7 files changed

+14
-42
lines changed

drivers/net/veth.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -975,7 +975,6 @@ static int veth_poll(struct napi_struct *napi, int budget)
975975

976976
bq.count = 0;
977977

978-
xdp_set_return_frame_no_direct();
979978
done = veth_xdp_rcv(rq, budget, &bq, &stats);
980979

981980
if (stats.xdp_redirect > 0)
@@ -994,7 +993,6 @@ static int veth_poll(struct napi_struct *napi, int budget)
994993

995994
if (stats.xdp_tx > 0)
996995
veth_xdp_flush(rq, &bq);
997-
xdp_clear_return_frame_no_direct();
998996

999997
return done;
1000998
}

include/linux/filter.h

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -764,7 +764,6 @@ struct bpf_nh_params {
764764
};
765765

766766
/* flags for bpf_redirect_info kern_flags */
767-
#define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */
768767
#define BPF_RI_F_RI_INIT BIT(1)
769768
#define BPF_RI_F_CPU_MAP_INIT BIT(2)
770769
#define BPF_RI_F_DEV_MAP_INIT BIT(3)
@@ -1163,27 +1162,6 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
11631162
const struct bpf_insn *patch, u32 len);
11641163
int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt);
11651164

1166-
static inline bool xdp_return_frame_no_direct(void)
1167-
{
1168-
struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
1169-
1170-
return ri->kern_flags & BPF_RI_F_RF_NO_DIRECT;
1171-
}
1172-
1173-
static inline void xdp_set_return_frame_no_direct(void)
1174-
{
1175-
struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
1176-
1177-
ri->kern_flags |= BPF_RI_F_RF_NO_DIRECT;
1178-
}
1179-
1180-
static inline void xdp_clear_return_frame_no_direct(void)
1181-
{
1182-
struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
1183-
1184-
ri->kern_flags &= ~BPF_RI_F_RF_NO_DIRECT;
1185-
}
1186-
11871165
static inline int xdp_ok_fwd_dev(const struct net_device *fwd,
11881166
unsigned int pktlen)
11891167
{

include/net/xdp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
446446
}
447447

448448
void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type,
449-
bool napi_direct, struct xdp_buff *xdp);
449+
struct xdp_buff *xdp);
450450
void xdp_return_frame(struct xdp_frame *xdpf);
451451
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
452452
void xdp_return_buff(struct xdp_buff *xdp);

kernel/bpf/cpumap.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,6 @@ static void cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
253253

254254
rcu_read_lock();
255255
bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
256-
xdp_set_return_frame_no_direct();
257256

258257
ret->xdp_n = cpu_map_bpf_prog_run_xdp(rcpu, frames, ret->xdp_n, stats);
259258
if (unlikely(ret->skb_n))
@@ -263,7 +262,6 @@ static void cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
263262
if (stats->redirect)
264263
xdp_do_flush();
265264

266-
xdp_clear_return_frame_no_direct();
267265
bpf_net_ctx_clear(bpf_net_ctx);
268266
rcu_read_unlock();
269267

net/bpf/test_run.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,6 @@ static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog,
289289
local_bh_disable();
290290
bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
291291
ri = bpf_net_ctx_get_ri();
292-
xdp_set_return_frame_no_direct();
293292

294293
for (i = 0; i < batch_sz; i++) {
295294
page = page_pool_dev_alloc_pages(xdp->pp);
@@ -352,7 +351,6 @@ static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog,
352351
err = ret;
353352
}
354353

355-
xdp_clear_return_frame_no_direct();
356354
bpf_net_ctx_clear(bpf_net_ctx);
357355
local_bh_enable();
358356
return err;

net/core/filter.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4195,7 +4195,7 @@ static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
41954195
}
41964196

41974197
if (release) {
4198-
__xdp_return(netmem, mem_type, false, zc_frag);
4198+
__xdp_return(netmem, mem_type, zc_frag);
41994199
} else {
42004200
if (!tail)
42014201
skb_frag_off_add(frag, shrink);

net/core/xdp.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -431,18 +431,18 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_attach_page_pool);
431431
* of xdp_frames/pages in those cases.
432432
*/
433433
void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type,
434-
bool napi_direct, struct xdp_buff *xdp)
434+
struct xdp_buff *xdp)
435435
{
436436
switch (mem_type) {
437437
case MEM_TYPE_PAGE_POOL:
438438
netmem = netmem_compound_head(netmem);
439-
if (napi_direct && xdp_return_frame_no_direct())
440-
napi_direct = false;
439+
441440
/* No need to check netmem_is_pp() as mem->type knows this a
442441
* page_pool page
442+
*
443+
* page_pool can detect direct recycle.
443444
*/
444-
page_pool_put_full_netmem(netmem_get_pp(netmem), netmem,
445-
napi_direct);
445+
page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false);
446446
break;
447447
case MEM_TYPE_PAGE_SHARED:
448448
page_frag_free(__netmem_address(netmem));
@@ -471,10 +471,10 @@ void xdp_return_frame(struct xdp_frame *xdpf)
471471
sinfo = xdp_get_shared_info_from_frame(xdpf);
472472
for (u32 i = 0; i < sinfo->nr_frags; i++)
473473
__xdp_return(skb_frag_netmem(&sinfo->frags[i]), xdpf->mem_type,
474-
false, NULL);
474+
NULL);
475475

476476
out:
477-
__xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, false, NULL);
477+
__xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, NULL);
478478
}
479479
EXPORT_SYMBOL_GPL(xdp_return_frame);
480480

@@ -488,10 +488,10 @@ void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
488488
sinfo = xdp_get_shared_info_from_frame(xdpf);
489489
for (u32 i = 0; i < sinfo->nr_frags; i++)
490490
__xdp_return(skb_frag_netmem(&sinfo->frags[i]), xdpf->mem_type,
491-
true, NULL);
491+
NULL);
492492

493493
out:
494-
__xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, true, NULL);
494+
__xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, NULL);
495495
}
496496
EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
497497

@@ -542,7 +542,7 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_bulk);
542542
*/
543543
void xdp_return_frag(netmem_ref netmem, const struct xdp_buff *xdp)
544544
{
545-
__xdp_return(netmem, xdp->rxq->mem.type, true, NULL);
545+
__xdp_return(netmem, xdp->rxq->mem.type, NULL);
546546
}
547547
EXPORT_SYMBOL_GPL(xdp_return_frag);
548548

@@ -556,10 +556,10 @@ void xdp_return_buff(struct xdp_buff *xdp)
556556
sinfo = xdp_get_shared_info_from_buff(xdp);
557557
for (u32 i = 0; i < sinfo->nr_frags; i++)
558558
__xdp_return(skb_frag_netmem(&sinfo->frags[i]),
559-
xdp->rxq->mem.type, true, xdp);
559+
xdp->rxq->mem.type, xdp);
560560

561561
out:
562-
__xdp_return(virt_to_netmem(xdp->data), xdp->rxq->mem.type, true, xdp);
562+
__xdp_return(virt_to_netmem(xdp->data), xdp->rxq->mem.type, xdp);
563563
}
564564
EXPORT_SYMBOL_GPL(xdp_return_buff);
565565

0 commit comments

Comments
 (0)