99#include <linux/limits.h>
1010#include <linux/module.h>
1111#include <linux/mutex.h>
12+ #include <linux/netdevice.h>
1213#include <net/page_pool/helpers.h>
1314
1415#include "time_bench.h"
1516
1617static int verbose = 1 ;
1718#define MY_POOL_SIZE 1024
1819
20+ DEFINE_MUTEX (wait_for_tasklet );
21+
1922/* Makes tests selectable. Useful for perf-record to analyze a single test.
2023 * Hint: Bash shells support writing binary number like: $((2#101010)
2124 *
@@ -31,6 +34,10 @@ enum benchmark_bit {
3134 bit_run_bench_no_softirq01 ,
3235 bit_run_bench_no_softirq02 ,
3336 bit_run_bench_no_softirq03 ,
37+ bit_run_bench_tasklet01 ,
38+ bit_run_bench_tasklet02 ,
39+ bit_run_bench_tasklet03 ,
40+ bit_run_bench_tasklet04 ,
3441};
3542
3643#define bit (b ) (1 << (b))
@@ -120,7 +127,12 @@ static void pp_fill_ptr_ring(struct page_pool *pp, int elems)
120127 kfree (array );
121128}
122129
123- enum test_type { type_fast_path , type_ptr_ring , type_page_allocator };
130+ enum test_type {
131+ type_fast_path ,
132+ type_napi_aware ,
133+ type_ptr_ring ,
134+ type_page_allocator ,
135+ };
124136
125137/* Depends on compile optimizing this function */
126138static int time_bench_page_pool (struct time_bench_record * rec , void * data ,
@@ -132,6 +144,7 @@ static int time_bench_page_pool(struct time_bench_record *rec, void *data,
132144
133145 struct page_pool * pp ;
134146 struct page * page ;
147+ struct napi_struct napi = {0 };
135148
136149 struct page_pool_params pp_params = {
137150 .order = 0 ,
@@ -141,6 +154,7 @@ static int time_bench_page_pool(struct time_bench_record *rec, void *data,
141154 .dev = NULL , /* Only use for DMA mapping */
142155 .dma_dir = DMA_BIDIRECTIONAL ,
143156 };
157+ struct page_pool_stats stats = {0 };
144158
145159 pp = page_pool_create (& pp_params );
146160 if (IS_ERR (pp )) {
@@ -155,6 +169,11 @@ static int time_bench_page_pool(struct time_bench_record *rec, void *data,
155169 else
156170 pr_warn ("%s(): Cannot use page_pool fast-path\n" , func );
157171
172+ if (type == type_napi_aware ) {
173+ napi .list_owner = smp_processor_id ();
174+ page_pool_enable_direct_recycling (pp , & napi );
175+ }
176+
158177 time_bench_start (rec );
159178 /** Loop to measure **/
160179 for (i = 0 ; i < rec -> loops ; i ++ ) {
@@ -173,7 +192,13 @@ static int time_bench_page_pool(struct time_bench_record *rec, void *data,
173192 page_pool_recycle_direct (pp , page );
174193
175194 } else if (type == type_ptr_ring ) {
176- /* Normal return path */
195+ /* Normal return path, either direct or via ptr_ring */
196+ page_pool_put_page (pp , page , -1 , false);
197+
198+ } else if (type == type_napi_aware ) {
199+ /* NAPI-aware recycling: uses fast-path recycling if
200+ * possible.
201+ */
177202 page_pool_put_page (pp , page , -1 , false);
178203
179204 } else if (type == type_page_allocator ) {
@@ -188,6 +213,14 @@ static int time_bench_page_pool(struct time_bench_record *rec, void *data,
188213 }
189214 }
190215 time_bench_stop (rec , loops_cnt );
216+
217+ if (type == type_napi_aware ) {
218+ page_pool_get_stats (pp , & stats );
219+ if (stats .recycle_stats .cached < rec -> loops )
220+ pr_warn ("%s(): NAPI-aware recycling wasn't used\n" ,
221+ func );
222+ }
223+
191224out :
192225 page_pool_destroy (pp );
193226 return loops_cnt ;
@@ -211,6 +244,54 @@ static int time_bench_page_pool03_slow(struct time_bench_record *rec,
211244 return time_bench_page_pool (rec , data , type_page_allocator , __func__ );
212245}
213246
247+ static int time_bench_page_pool04_napi_aware (struct time_bench_record * rec ,
248+ void * data )
249+ {
250+ return time_bench_page_pool (rec , data , type_napi_aware , __func__ );
251+ }
252+
253+ /* Testing page_pool requires running under softirq.
254+ *
255+ * Running under a tasklet satisfy this, as tasklets are built on top of
256+ * softirq.
257+ */
258+ static void pp_tasklet_handler (struct tasklet_struct * t )
259+ {
260+ uint32_t nr_loops = loops ;
261+
262+ if (in_serving_softirq ())
263+ pr_warn ("%s(): in_serving_softirq fast-path\n" ,
264+ __func__ ); // True
265+ else
266+ pr_warn ("%s(): Cannot use page_pool fast-path\n" , __func__ );
267+
268+ if (enabled (bit_run_bench_tasklet01 ))
269+ time_bench_loop (nr_loops , 0 , "tasklet_page_pool01_fast_path" ,
270+ NULL , time_bench_page_pool01_fast_path );
271+
272+ if (enabled (bit_run_bench_tasklet02 ))
273+ time_bench_loop (nr_loops , 0 , "tasklet_page_pool02_ptr_ring" ,
274+ NULL , time_bench_page_pool02_ptr_ring );
275+
276+ if (enabled (bit_run_bench_tasklet03 ))
277+ time_bench_loop (nr_loops , 0 , "tasklet_page_pool03_slow" , NULL ,
278+ time_bench_page_pool03_slow );
279+
280+ if (enabled (bit_run_bench_tasklet04 ))
281+ time_bench_loop (nr_loops , 0 , "tasklet_page_pool04_napi_aware" ,
282+ NULL , time_bench_page_pool04_napi_aware );
283+
284+ mutex_unlock (& wait_for_tasklet ); /* Module __init waiting on unlock */
285+ }
286+ DECLARE_TASKLET_DISABLED (pp_tasklet , pp_tasklet_handler );
287+
288+ static void run_tasklet_tests (void )
289+ {
290+ tasklet_enable (& pp_tasklet );
291+ /* "Async" schedule tasklet, which runs on the CPU that schedule it */
292+ tasklet_schedule (& pp_tasklet );
293+ }
294+
214295static int run_benchmark_tests (void )
215296{
216297 uint32_t nr_loops = loops ;
@@ -251,12 +332,19 @@ static int __init bench_page_pool_simple_module_init(void)
251332
252333 run_benchmark_tests ();
253334
335+ mutex_lock (& wait_for_tasklet );
336+ run_tasklet_tests ();
337+ /* Sleep on mutex, waiting for tasklet to release */
338+ mutex_lock (& wait_for_tasklet );
339+
254340 return 0 ;
255341}
256342module_init (bench_page_pool_simple_module_init );
257343
258344static void __exit bench_page_pool_simple_module_exit (void )
259345{
346+ tasklet_kill (& pp_tasklet );
347+
260348 if (verbose )
261349 pr_info ("Unloaded\n" );
262350}
0 commit comments