diff --git a/drivers/net/veth.c b/drivers/net/veth.c index a3046142cb8e2..6d5c1e0b05a7c 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -975,7 +975,6 @@ static int veth_poll(struct napi_struct *napi, int budget) bq.count = 0; - xdp_set_return_frame_no_direct(); done = veth_xdp_rcv(rq, budget, &bq, &stats); if (stats.xdp_redirect > 0) @@ -994,7 +993,6 @@ static int veth_poll(struct napi_struct *napi, int budget) if (stats.xdp_tx > 0) veth_xdp_flush(rq, &bq); - xdp_clear_return_frame_no_direct(); return done; } diff --git a/include/linux/filter.h b/include/linux/filter.h index f5c859b8131a3..877e40d81a4c0 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -764,7 +764,6 @@ struct bpf_nh_params { }; /* flags for bpf_redirect_info kern_flags */ -#define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */ #define BPF_RI_F_RI_INIT BIT(1) #define BPF_RI_F_CPU_MAP_INIT BIT(2) #define BPF_RI_F_DEV_MAP_INIT BIT(3) @@ -1163,27 +1162,6 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt); -static inline bool xdp_return_frame_no_direct(void) -{ - struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); - - return ri->kern_flags & BPF_RI_F_RF_NO_DIRECT; -} - -static inline void xdp_set_return_frame_no_direct(void) -{ - struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); - - ri->kern_flags |= BPF_RI_F_RF_NO_DIRECT; -} - -static inline void xdp_clear_return_frame_no_direct(void) -{ - struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); - - ri->kern_flags &= ~BPF_RI_F_RF_NO_DIRECT; -} - static inline int xdp_ok_fwd_dev(const struct net_device *fwd, unsigned int pktlen) { diff --git a/include/net/xdp.h b/include/net/xdp.h index aa742f413c358..2a44d84a7611d 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -446,7 +446,7 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) } void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type, - bool napi_direct, struct xdp_buff *xdp); + struct xdp_buff *xdp); void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 703e5df1f4ef9..3ece03dc36bdc 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -253,7 +253,6 @@ static void cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, rcu_read_lock(); bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); - xdp_set_return_frame_no_direct(); ret->xdp_n = cpu_map_bpf_prog_run_xdp(rcpu, frames, ret->xdp_n, stats); if (unlikely(ret->skb_n)) @@ -263,7 +262,6 @@ static void cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, if (stats->redirect) xdp_do_flush(); - xdp_clear_return_frame_no_direct(); bpf_net_ctx_clear(bpf_net_ctx); rcu_read_unlock(); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 655efac6f1334..a349b976f819d 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -289,7 +289,6 @@ static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog, local_bh_disable(); bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); ri = bpf_net_ctx_get_ri(); - xdp_set_return_frame_no_direct(); for (i = 0; i < batch_sz; i++) { page = page_pool_dev_alloc_pages(xdp->pp); @@ -352,7 +351,6 @@ static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog, err = ret; } - xdp_clear_return_frame_no_direct(); bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); return err; diff --git a/net/core/filter.c b/net/core/filter.c index 1efec0d70d783..57c587e910ec3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4195,7 +4195,7 @@ static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag, } if (release) { - __xdp_return(netmem, mem_type, false, zc_frag); + __xdp_return(netmem, mem_type, zc_frag); } else { if (!tail) skb_frag_off_add(frag, shrink); diff --git a/net/core/xdp.c b/net/core/xdp.c index 9100e160113a9..cf8eab699d9a3 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -431,18 +431,18 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_attach_page_pool); * of xdp_frames/pages in those cases. */ void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type, - bool napi_direct, struct xdp_buff *xdp) + struct xdp_buff *xdp) { switch (mem_type) { case MEM_TYPE_PAGE_POOL: netmem = netmem_compound_head(netmem); - if (napi_direct && xdp_return_frame_no_direct()) - napi_direct = false; + /* No need to check netmem_is_pp() as mem->type knows this a * page_pool page + * + * page_pool can detect direct recycle. */ - page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, - napi_direct); + page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false); break; case MEM_TYPE_PAGE_SHARED: page_frag_free(__netmem_address(netmem)); @@ -471,10 +471,10 @@ void xdp_return_frame(struct xdp_frame *xdpf) sinfo = xdp_get_shared_info_from_frame(xdpf); for (u32 i = 0; i < sinfo->nr_frags; i++) __xdp_return(skb_frag_netmem(&sinfo->frags[i]), xdpf->mem_type, - false, NULL); + NULL); out: - __xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, false, NULL); + __xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame); @@ -488,10 +488,10 @@ void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) sinfo = xdp_get_shared_info_from_frame(xdpf); for (u32 i = 0; i < sinfo->nr_frags; i++) __xdp_return(skb_frag_netmem(&sinfo->frags[i]), xdpf->mem_type, - true, NULL); + NULL); out: - __xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, true, NULL); + __xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); @@ -542,7 +542,7 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_bulk); */ void xdp_return_frag(netmem_ref netmem, const struct xdp_buff *xdp) { - __xdp_return(netmem, xdp->rxq->mem.type, true, NULL); + __xdp_return(netmem, xdp->rxq->mem.type, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frag); @@ -556,10 +556,10 @@ void xdp_return_buff(struct xdp_buff *xdp) sinfo = xdp_get_shared_info_from_buff(xdp); for (u32 i = 0; i < sinfo->nr_frags; i++) __xdp_return(skb_frag_netmem(&sinfo->frags[i]), - xdp->rxq->mem.type, true, xdp); + xdp->rxq->mem.type, xdp); out: - __xdp_return(virt_to_netmem(xdp->data), xdp->rxq->mem.type, true, xdp); + __xdp_return(virt_to_netmem(xdp->data), xdp->rxq->mem.type, xdp); } EXPORT_SYMBOL_GPL(xdp_return_buff); diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c index cb6468adbda4d..84683c5478146 100644 --- a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c +++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "time_bench.h" @@ -16,6 +17,8 @@ static int verbose = 1; #define MY_POOL_SIZE 1024 +DEFINE_MUTEX(wait_for_tasklet); + /* Makes tests selectable. Useful for perf-record to analyze a single test. * Hint: Bash shells support writing binary number like: $((2#101010) * @@ -31,6 +34,10 @@ enum benchmark_bit { bit_run_bench_no_softirq01, bit_run_bench_no_softirq02, bit_run_bench_no_softirq03, + bit_run_bench_tasklet01, + bit_run_bench_tasklet02, + bit_run_bench_tasklet03, + bit_run_bench_tasklet04, }; #define bit(b) (1 << (b)) @@ -120,7 +127,12 @@ static void pp_fill_ptr_ring(struct page_pool *pp, int elems) kfree(array); } -enum test_type { type_fast_path, type_ptr_ring, type_page_allocator }; +enum test_type { + type_fast_path, + type_napi_aware, + type_ptr_ring, + type_page_allocator, +}; /* Depends on compile optimizing this function */ static int time_bench_page_pool(struct time_bench_record *rec, void *data, @@ -132,6 +144,7 @@ static int time_bench_page_pool(struct time_bench_record *rec, void *data, struct page_pool *pp; struct page *page; + struct napi_struct napi = {0}; struct page_pool_params pp_params = { .order = 0, @@ -141,6 +154,7 @@ static int time_bench_page_pool(struct time_bench_record *rec, void *data, .dev = NULL, /* Only use for DMA mapping */ .dma_dir = DMA_BIDIRECTIONAL, }; + struct page_pool_stats stats = {0}; pp = page_pool_create(&pp_params); if (IS_ERR(pp)) { @@ -155,6 +169,11 @@ static int time_bench_page_pool(struct time_bench_record *rec, void *data, else pr_warn("%s(): Cannot use page_pool fast-path\n", func); + if (type == type_napi_aware) { + napi.list_owner = smp_processor_id(); + page_pool_enable_direct_recycling(pp, &napi); + } + time_bench_start(rec); /** Loop to measure **/ for (i = 0; i < rec->loops; i++) { @@ -173,7 +192,13 @@ static int time_bench_page_pool(struct time_bench_record *rec, void *data, page_pool_recycle_direct(pp, page); } else if (type == type_ptr_ring) { - /* Normal return path */ + /* Normal return path, either direct or via ptr_ring */ + page_pool_put_page(pp, page, -1, false); + + } else if (type == type_napi_aware) { + /* NAPI-aware recycling: uses fast-path recycling if + * possible. + */ page_pool_put_page(pp, page, -1, false); } else if (type == type_page_allocator) { @@ -188,6 +213,14 @@ static int time_bench_page_pool(struct time_bench_record *rec, void *data, } } time_bench_stop(rec, loops_cnt); + + if (type == type_napi_aware) { + page_pool_get_stats(pp, &stats); + if (stats.recycle_stats.cached < rec->loops) + pr_warn("%s(): NAPI-aware recycling wasn't used\n", + func); + } + out: page_pool_destroy(pp); return loops_cnt; @@ -211,6 +244,54 @@ static int time_bench_page_pool03_slow(struct time_bench_record *rec, return time_bench_page_pool(rec, data, type_page_allocator, __func__); } +static int time_bench_page_pool04_napi_aware(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_napi_aware, __func__); +} + +/* Testing page_pool requires running under softirq. + * + * Running under a tasklet satisfy this, as tasklets are built on top of + * softirq. + */ +static void pp_tasklet_handler(struct tasklet_struct *t) +{ + uint32_t nr_loops = loops; + + if (in_serving_softirq()) + pr_warn("%s(): in_serving_softirq fast-path\n", + __func__); // True + else + pr_warn("%s(): Cannot use page_pool fast-path\n", __func__); + + if (enabled(bit_run_bench_tasklet01)) + time_bench_loop(nr_loops, 0, "tasklet_page_pool01_fast_path", + NULL, time_bench_page_pool01_fast_path); + + if (enabled(bit_run_bench_tasklet02)) + time_bench_loop(nr_loops, 0, "tasklet_page_pool02_ptr_ring", + NULL, time_bench_page_pool02_ptr_ring); + + if (enabled(bit_run_bench_tasklet03)) + time_bench_loop(nr_loops, 0, "tasklet_page_pool03_slow", NULL, + time_bench_page_pool03_slow); + + if (enabled(bit_run_bench_tasklet04)) + time_bench_loop(nr_loops, 0, "tasklet_page_pool04_napi_aware", + NULL, time_bench_page_pool04_napi_aware); + + mutex_unlock(&wait_for_tasklet); /* Module __init waiting on unlock */ +} +DECLARE_TASKLET_DISABLED(pp_tasklet, pp_tasklet_handler); + +static void run_tasklet_tests(void) +{ + tasklet_enable(&pp_tasklet); + /* "Async" schedule tasklet, which runs on the CPU that schedule it */ + tasklet_schedule(&pp_tasklet); +} + static int run_benchmark_tests(void) { uint32_t nr_loops = loops; @@ -251,12 +332,19 @@ static int __init bench_page_pool_simple_module_init(void) run_benchmark_tests(); + mutex_lock(&wait_for_tasklet); + run_tasklet_tests(); + /* Sleep on mutex, waiting for tasklet to release */ + mutex_lock(&wait_for_tasklet); + return 0; } module_init(bench_page_pool_simple_module_init); static void __exit bench_page_pool_simple_module_exit(void) { + tasklet_kill(&pp_tasklet); + if (verbose) pr_info("Unloaded\n"); }