@@ -925,8 +925,15 @@ static unsigned long __percpu *scx_kick_cpus_pnt_seqs;
925925 */
926926static DEFINE_PER_CPU (struct task_struct * , direct_dispatch_task ) ;
927927
928- /* dispatch queues */
929- static struct scx_dispatch_q __cacheline_aligned_in_smp scx_dsq_global ;
928+ /*
929+ * Dispatch queues.
930+ *
931+ * The global DSQ (%SCX_DSQ_GLOBAL) is split per-node for scalability. This is
932+ * to avoid live-locking in bypass mode where all tasks are dispatched to
933+ * %SCX_DSQ_GLOBAL and all CPUs consume from it. If per-node split isn't
934+ * sufficient, it can be further split.
935+ */
936+ static struct scx_dispatch_q * * global_dsqs ;
930937
931938static const struct rhashtable_params dsq_hash_params = {
932939 .key_len = 8 ,
@@ -1029,6 +1036,11 @@ static bool u32_before(u32 a, u32 b)
10291036 return (s32 )(a - b ) < 0 ;
10301037}
10311038
1039+ static struct scx_dispatch_q * find_global_dsq (struct task_struct * p )
1040+ {
1041+ return global_dsqs [cpu_to_node (task_cpu (p ))];
1042+ }
1043+
10321044static struct scx_dispatch_q * find_user_dsq (u64 dsq_id )
10331045{
10341046 return rhashtable_lookup_fast (& dsq_hash , & dsq_id , dsq_hash_params );
@@ -1642,7 +1654,7 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p,
16421654 scx_ops_error ("attempting to dispatch to a destroyed dsq" );
16431655 /* fall back to the global dsq */
16441656 raw_spin_unlock (& dsq -> lock );
1645- dsq = & scx_dsq_global ;
1657+ dsq = find_global_dsq ( p ) ;
16461658 raw_spin_lock (& dsq -> lock );
16471659 }
16481660 }
@@ -1820,20 +1832,20 @@ static struct scx_dispatch_q *find_dsq_for_dispatch(struct rq *rq, u64 dsq_id,
18201832 s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK ;
18211833
18221834 if (!ops_cpu_valid (cpu , "in SCX_DSQ_LOCAL_ON dispatch verdict" ))
1823- return & scx_dsq_global ;
1835+ return find_global_dsq ( p ) ;
18241836
18251837 return & cpu_rq (cpu )-> scx .local_dsq ;
18261838 }
18271839
18281840 if (dsq_id == SCX_DSQ_GLOBAL )
1829- dsq = & scx_dsq_global ;
1841+ dsq = find_global_dsq ( p ) ;
18301842 else
18311843 dsq = find_user_dsq (dsq_id );
18321844
18331845 if (unlikely (!dsq )) {
18341846 scx_ops_error ("non-existent DSQ 0x%llx for %s[%d]" ,
18351847 dsq_id , p -> comm , p -> pid );
1836- return & scx_dsq_global ;
1848+ return find_global_dsq ( p ) ;
18371849 }
18381850
18391851 return dsq ;
@@ -2005,7 +2017,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
20052017global :
20062018 touch_core_sched (rq , p ); /* see the comment in local: */
20072019 p -> scx .slice = SCX_SLICE_DFL ;
2008- dispatch_enqueue (& scx_dsq_global , p , enq_flags );
2020+ dispatch_enqueue (find_global_dsq ( p ) , p , enq_flags );
20092021}
20102022
20112023static bool task_runnable (const struct task_struct * p )
@@ -2391,6 +2403,13 @@ static bool consume_dispatch_q(struct rq *rq, struct scx_dispatch_q *dsq)
23912403 return false;
23922404}
23932405
2406+ static bool consume_global_dsq (struct rq * rq )
2407+ {
2408+ int node = cpu_to_node (cpu_of (rq ));
2409+
2410+ return consume_dispatch_q (rq , global_dsqs [node ]);
2411+ }
2412+
23942413/**
23952414 * dispatch_to_local_dsq - Dispatch a task to a local dsq
23962415 * @rq: current rq which is locked
@@ -2424,7 +2443,8 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq,
24242443
24252444#ifdef CONFIG_SMP
24262445 if (unlikely (!task_can_run_on_remote_rq (p , dst_rq , true))) {
2427- dispatch_enqueue (& scx_dsq_global , p , enq_flags | SCX_ENQ_CLEAR_OPSS );
2446+ dispatch_enqueue (find_global_dsq (p ), p ,
2447+ enq_flags | SCX_ENQ_CLEAR_OPSS );
24282448 return ;
24292449 }
24302450
@@ -2624,7 +2644,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
26242644 if (rq -> scx .local_dsq .nr )
26252645 goto has_tasks ;
26262646
2627- if (consume_dispatch_q (rq , & scx_dsq_global ))
2647+ if (consume_global_dsq (rq ))
26282648 goto has_tasks ;
26292649
26302650 if (!SCX_HAS_OP (dispatch ) || scx_rq_bypassing (rq ) || !scx_rq_online (rq ))
@@ -2649,7 +2669,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
26492669
26502670 if (rq -> scx .local_dsq .nr )
26512671 goto has_tasks ;
2652- if (consume_dispatch_q (rq , & scx_dsq_global ))
2672+ if (consume_global_dsq (rq ))
26532673 goto has_tasks ;
26542674
26552675 /*
@@ -4924,7 +4944,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
49244944 struct scx_task_iter sti ;
49254945 struct task_struct * p ;
49264946 unsigned long timeout ;
4927- int i , cpu , ret ;
4947+ int i , cpu , node , ret ;
49284948
49294949 if (!cpumask_equal (housekeeping_cpumask (HK_TYPE_DOMAIN ),
49304950 cpu_possible_mask )) {
@@ -4943,6 +4963,34 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
49434963 }
49444964 }
49454965
4966+ if (!global_dsqs ) {
4967+ struct scx_dispatch_q * * dsqs ;
4968+
4969+ dsqs = kcalloc (nr_node_ids , sizeof (dsqs [0 ]), GFP_KERNEL );
4970+ if (!dsqs ) {
4971+ ret = - ENOMEM ;
4972+ goto err_unlock ;
4973+ }
4974+
4975+ for_each_node_state (node , N_POSSIBLE ) {
4976+ struct scx_dispatch_q * dsq ;
4977+
4978+ dsq = kzalloc_node (sizeof (* dsq ), GFP_KERNEL , node );
4979+ if (!dsq ) {
4980+ for_each_node_state (node , N_POSSIBLE )
4981+ kfree (dsqs [node ]);
4982+ kfree (dsqs );
4983+ ret = - ENOMEM ;
4984+ goto err_unlock ;
4985+ }
4986+
4987+ init_dsq (dsq , SCX_DSQ_GLOBAL );
4988+ dsqs [node ] = dsq ;
4989+ }
4990+
4991+ global_dsqs = dsqs ;
4992+ }
4993+
49464994 if (scx_ops_enable_state () != SCX_OPS_DISABLED ) {
49474995 ret = - EBUSY ;
49484996 goto err_unlock ;
@@ -5777,7 +5825,6 @@ void __init init_sched_ext_class(void)
57775825 SCX_TG_ONLINE );
57785826
57795827 BUG_ON (rhashtable_init (& dsq_hash , & dsq_hash_params ));
5780- init_dsq (& scx_dsq_global , SCX_DSQ_GLOBAL );
57815828#ifdef CONFIG_SMP
57825829 BUG_ON (!alloc_cpumask_var (& idle_masks .cpu , GFP_KERNEL ));
57835830 BUG_ON (!alloc_cpumask_var (& idle_masks .smt , GFP_KERNEL ));
@@ -6053,7 +6100,7 @@ static bool scx_dispatch_from_dsq(struct bpf_iter_scx_dsq_kern *kit,
60536100 if (dst_dsq -> id == SCX_DSQ_LOCAL ) {
60546101 dst_rq = container_of (dst_dsq , struct rq , scx .local_dsq );
60556102 if (!task_can_run_on_remote_rq (p , dst_rq , true)) {
6056- dst_dsq = & scx_dsq_global ;
6103+ dst_dsq = find_global_dsq ( p ) ;
60576104 dst_rq = src_rq ;
60586105 }
60596106 } else {
0 commit comments