27
27
enum consts {
28
28
ONE_SEC_IN_NS = 1000000000 ,
29
29
SHARED_DSQ = 0 ,
30
+ HIGHPRI_DSQ = 1 ,
31
+ HIGHPRI_WEIGHT = 8668 , /* this is what -20 maps to */
30
32
};
31
33
32
34
char _license [] SEC ("license" ) = "GPL" ;
@@ -36,10 +38,12 @@ const volatile u32 stall_user_nth;
36
38
const volatile u32 stall_kernel_nth ;
37
39
const volatile u32 dsp_inf_loop_after ;
38
40
const volatile u32 dsp_batch ;
41
+ const volatile bool highpri_boosting ;
39
42
const volatile bool print_shared_dsq ;
40
43
const volatile s32 disallow_tgid ;
41
44
const volatile bool suppress_dump ;
42
45
46
+ u64 nr_highpri_queued ;
43
47
u32 test_error_cnt ;
44
48
45
49
UEI_DEFINE (uei );
@@ -95,6 +99,7 @@ static u64 core_sched_tail_seqs[5];
95
99
/* Per-task scheduling context */
96
100
struct task_ctx {
97
101
bool force_local ; /* Dispatch directly to local_dsq */
102
+ bool highpri ;
98
103
u64 core_sched_seq ;
99
104
};
100
105
@@ -122,6 +127,7 @@ struct {
122
127
/* Statistics */
123
128
u64 nr_enqueued , nr_dispatched , nr_reenqueued , nr_dequeued , nr_ddsp_from_enq ;
124
129
u64 nr_core_sched_execed ;
130
+ u64 nr_expedited_local , nr_expedited_remote , nr_expedited_lost , nr_expedited_from_timer ;
125
131
u32 cpuperf_min , cpuperf_avg , cpuperf_max ;
126
132
u32 cpuperf_target_min , cpuperf_target_avg , cpuperf_target_max ;
127
133
@@ -140,17 +146,25 @@ static s32 pick_direct_dispatch_cpu(struct task_struct *p, s32 prev_cpu)
140
146
return -1 ;
141
147
}
142
148
149
+ static struct task_ctx * lookup_task_ctx (struct task_struct * p )
150
+ {
151
+ struct task_ctx * tctx ;
152
+
153
+ if (!(tctx = bpf_task_storage_get (& task_ctx_stor , p , 0 , 0 ))) {
154
+ scx_bpf_error ("task_ctx lookup failed" );
155
+ return NULL ;
156
+ }
157
+ return tctx ;
158
+ }
159
+
143
160
s32 BPF_STRUCT_OPS (qmap_select_cpu , struct task_struct * p ,
144
161
s32 prev_cpu , u64 wake_flags )
145
162
{
146
163
struct task_ctx * tctx ;
147
164
s32 cpu ;
148
165
149
- tctx = bpf_task_storage_get (& task_ctx_stor , p , 0 , 0 );
150
- if (!tctx ) {
151
- scx_bpf_error ("task_ctx lookup failed" );
166
+ if (!(tctx = lookup_task_ctx (p )))
152
167
return - ESRCH ;
153
- }
154
168
155
169
cpu = pick_direct_dispatch_cpu (p , prev_cpu );
156
170
@@ -197,11 +211,8 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
197
211
if (test_error_cnt && !-- test_error_cnt )
198
212
scx_bpf_error ("test triggering error" );
199
213
200
- tctx = bpf_task_storage_get (& task_ctx_stor , p , 0 , 0 );
201
- if (!tctx ) {
202
- scx_bpf_error ("task_ctx lookup failed" );
214
+ if (!(tctx = lookup_task_ctx (p )))
203
215
return ;
204
- }
205
216
206
217
/*
207
218
* All enqueued tasks must have their core_sched_seq updated for correct
@@ -255,6 +266,10 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
255
266
return ;
256
267
}
257
268
269
+ if (highpri_boosting && p -> scx .weight >= HIGHPRI_WEIGHT ) {
270
+ tctx -> highpri = true;
271
+ __sync_fetch_and_add (& nr_highpri_queued , 1 );
272
+ }
258
273
__sync_fetch_and_add (& nr_enqueued , 1 );
259
274
}
260
275
@@ -271,13 +286,80 @@ void BPF_STRUCT_OPS(qmap_dequeue, struct task_struct *p, u64 deq_flags)
271
286
272
287
static void update_core_sched_head_seq (struct task_struct * p )
273
288
{
274
- struct task_ctx * tctx = bpf_task_storage_get (& task_ctx_stor , p , 0 , 0 );
275
289
int idx = weight_to_idx (p -> scx .weight );
290
+ struct task_ctx * tctx ;
276
291
277
- if (tctx )
292
+ if (( tctx = lookup_task_ctx ( p )) )
278
293
core_sched_head_seqs [idx ] = tctx -> core_sched_seq ;
279
- else
280
- scx_bpf_error ("task_ctx lookup failed" );
294
+ }
295
+
296
+ /*
297
+ * To demonstrate the use of scx_bpf_dispatch_from_dsq(), implement silly
298
+ * selective priority boosting mechanism by scanning SHARED_DSQ looking for
299
+ * highpri tasks, moving them to HIGHPRI_DSQ and then consuming them first. This
300
+ * makes minor difference only when dsp_batch is larger than 1.
301
+ *
302
+ * scx_bpf_dispatch[_vtime]_from_dsq() are allowed both from ops.dispatch() and
303
+ * non-rq-lock holding BPF programs. As demonstration, this function is called
304
+ * from qmap_dispatch() and monitor_timerfn().
305
+ */
306
+ static bool dispatch_highpri (bool from_timer )
307
+ {
308
+ struct task_struct * p ;
309
+ s32 this_cpu = bpf_get_smp_processor_id ();
310
+
311
+ /* scan SHARED_DSQ and move highpri tasks to HIGHPRI_DSQ */
312
+ bpf_for_each (scx_dsq , p , SHARED_DSQ , 0 ) {
313
+ static u64 highpri_seq ;
314
+ struct task_ctx * tctx ;
315
+
316
+ if (!(tctx = lookup_task_ctx (p )))
317
+ return false;
318
+
319
+ if (tctx -> highpri ) {
320
+ /* exercise the set_*() and vtime interface too */
321
+ scx_bpf_dispatch_from_dsq_set_slice (
322
+ BPF_FOR_EACH_ITER , slice_ns * 2 );
323
+ scx_bpf_dispatch_from_dsq_set_vtime (
324
+ BPF_FOR_EACH_ITER , highpri_seq ++ );
325
+ scx_bpf_dispatch_vtime_from_dsq (
326
+ BPF_FOR_EACH_ITER , p , HIGHPRI_DSQ , 0 );
327
+ }
328
+ }
329
+
330
+ /*
331
+ * Scan HIGHPRI_DSQ and dispatch until a task that can run on this CPU
332
+ * is found.
333
+ */
334
+ bpf_for_each (scx_dsq , p , HIGHPRI_DSQ , 0 ) {
335
+ bool dispatched = false;
336
+ s32 cpu ;
337
+
338
+ if (bpf_cpumask_test_cpu (this_cpu , p -> cpus_ptr ))
339
+ cpu = this_cpu ;
340
+ else
341
+ cpu = scx_bpf_pick_any_cpu (p -> cpus_ptr , 0 );
342
+
343
+ if (scx_bpf_dispatch_from_dsq (BPF_FOR_EACH_ITER , p ,
344
+ SCX_DSQ_LOCAL_ON | cpu ,
345
+ SCX_ENQ_PREEMPT )) {
346
+ if (cpu == this_cpu ) {
347
+ dispatched = true;
348
+ __sync_fetch_and_add (& nr_expedited_local , 1 );
349
+ } else {
350
+ __sync_fetch_and_add (& nr_expedited_remote , 1 );
351
+ }
352
+ if (from_timer )
353
+ __sync_fetch_and_add (& nr_expedited_from_timer , 1 );
354
+ } else {
355
+ __sync_fetch_and_add (& nr_expedited_lost , 1 );
356
+ }
357
+
358
+ if (dispatched )
359
+ return true;
360
+ }
361
+
362
+ return false;
281
363
}
282
364
283
365
void BPF_STRUCT_OPS (qmap_dispatch , s32 cpu , struct task_struct * prev )
@@ -289,7 +371,10 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev)
289
371
void * fifo ;
290
372
s32 i , pid ;
291
373
292
- if (scx_bpf_consume (SHARED_DSQ ))
374
+ if (dispatch_highpri (false))
375
+ return ;
376
+
377
+ if (!nr_highpri_queued && scx_bpf_consume (SHARED_DSQ ))
293
378
return ;
294
379
295
380
if (dsp_inf_loop_after && nr_dispatched > dsp_inf_loop_after ) {
@@ -326,20 +411,34 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev)
326
411
327
412
/* Dispatch or advance. */
328
413
bpf_repeat (BPF_MAX_LOOPS ) {
414
+ struct task_ctx * tctx ;
415
+
329
416
if (bpf_map_pop_elem (fifo , & pid ))
330
417
break ;
331
418
332
419
p = bpf_task_from_pid (pid );
333
420
if (!p )
334
421
continue ;
335
422
423
+ if (!(tctx = lookup_task_ctx (p ))) {
424
+ bpf_task_release (p );
425
+ return ;
426
+ }
427
+
428
+ if (tctx -> highpri )
429
+ __sync_fetch_and_sub (& nr_highpri_queued , 1 );
430
+
336
431
update_core_sched_head_seq (p );
337
432
__sync_fetch_and_add (& nr_dispatched , 1 );
433
+
338
434
scx_bpf_dispatch (p , SHARED_DSQ , slice_ns , 0 );
339
435
bpf_task_release (p );
436
+
340
437
batch -- ;
341
438
cpuc -> dsp_cnt -- ;
342
439
if (!batch || !scx_bpf_dispatch_nr_slots ()) {
440
+ if (dispatch_highpri (false))
441
+ return ;
343
442
scx_bpf_consume (SHARED_DSQ );
344
443
return ;
345
444
}
@@ -664,6 +763,10 @@ static void dump_shared_dsq(void)
664
763
665
764
static int monitor_timerfn (void * map , int * key , struct bpf_timer * timer )
666
765
{
766
+ bpf_rcu_read_lock ();
767
+ dispatch_highpri (true);
768
+ bpf_rcu_read_unlock ();
769
+
667
770
monitor_cpuperf ();
668
771
669
772
if (print_shared_dsq )
@@ -685,6 +788,10 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init)
685
788
if (ret )
686
789
return ret ;
687
790
791
+ ret = scx_bpf_create_dsq (HIGHPRI_DSQ , -1 );
792
+ if (ret )
793
+ return ret ;
794
+
688
795
timer = bpf_map_lookup_elem (& monitor_timer , & key );
689
796
if (!timer )
690
797
return - ESRCH ;
0 commit comments