Skip to content

Commit 6a68cec

Browse files
committed
Merge tag 'sched_ext-for-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext updates from Tejun Heo: - Add support for cgroup "cpu.max" interface - Code organization cleanup so that ext_idle.c doesn't depend on the source-file-inclusion build method of sched/ - Drop UP paths in accordance with sched core changes - Documentation and other misc changes * tag 'sched_ext-for-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: sched_ext: Fix scx_bpf_reenqueue_local() reference sched_ext: Drop kfuncs marked for removal in 6.15 sched_ext, rcu: Eject BPF scheduler on RCU CPU stall panic kernel/sched/ext.c: fix typo "occured" -> "occurred" in comments sched_ext: Add support for cgroup bandwidth control interface sched_ext, sched/core: Factor out struct scx_task_group sched_ext: Return NULL in llc_span sched_ext: Always use SMP versions in kernel/sched/ext_idle.h sched_ext: Always use SMP versions in kernel/sched/ext_idle.c sched_ext: Always use SMP versions in kernel/sched/ext.h sched_ext: Always use SMP versions in kernel/sched/ext.c sched_ext: Documentation: Clarify time slice handling in task lifecycle sched_ext: Make scx_locked_rq() inline sched_ext: Make scx_rq_bypassing() inline sched_ext: idle: Make local functions static in ext_idle.c sched_ext: idle: Remove unnecessary ifdef in scx_bpf_cpu_node()
2 parents 6aee5ae + ae96bba commit 6a68cec

File tree

12 files changed

+241
-198
lines changed

12 files changed

+241
-198
lines changed

Documentation/scheduler/sched-ext.rst

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -313,16 +313,21 @@ by a sched_ext scheduler:
313313
ops.runnable(); /* Task becomes ready to run */
314314
315315
while (task is runnable) {
316-
if (task is not in a DSQ) {
316+
if (task is not in a DSQ && task->scx.slice == 0) {
317317
ops.enqueue(); /* Task can be added to a DSQ */
318318
319-
/* A CPU becomes available */
319+
/* Any usable CPU becomes available */
320320
321321
ops.dispatch(); /* Task is moved to a local DSQ */
322322
}
323323
ops.running(); /* Task starts running on its assigned CPU */
324-
ops.tick(); /* Called every 1/HZ seconds */
324+
while (task->scx.slice > 0 && task is runnable)
325+
ops.tick(); /* Called every 1/HZ seconds */
325326
ops.stopping(); /* Task stops running (time slice expires or wait) */
327+
328+
/* Task's CPU becomes available */
329+
330+
ops.dispatch(); /* task->scx.slice can be refilled */
326331
}
327332
328333
ops.quiescent(); /* Task releases its assigned CPU (wait) */

include/linux/sched/ext.h

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ struct sched_ext_entity {
164164

165165
/*
166166
* Runtime budget in nsecs. This is usually set through
167-
* scx_bpf_dispatch() but can also be modified directly by the BPF
167+
* scx_bpf_dsq_insert() but can also be modified directly by the BPF
168168
* scheduler. Automatically decreased by SCX as the task executes. On
169169
* depletion, a scheduling event is triggered.
170170
*
@@ -176,10 +176,10 @@ struct sched_ext_entity {
176176

177177
/*
178178
* Used to order tasks when dispatching to the vtime-ordered priority
179-
* queue of a dsq. This is usually set through scx_bpf_dispatch_vtime()
180-
* but can also be modified directly by the BPF scheduler. Modifying it
181-
* while a task is queued on a dsq may mangle the ordering and is not
182-
* recommended.
179+
* queue of a dsq. This is usually set through
180+
* scx_bpf_dsq_insert_vtime() but can also be modified directly by the
181+
* BPF scheduler. Modifying it while a task is queued on a dsq may
182+
* mangle the ordering and is not recommended.
183183
*/
184184
u64 dsq_vtime;
185185

@@ -206,12 +206,25 @@ struct sched_ext_entity {
206206
void sched_ext_free(struct task_struct *p);
207207
void print_scx_info(const char *log_lvl, struct task_struct *p);
208208
void scx_softlockup(u32 dur_s);
209+
bool scx_rcu_cpu_stall(void);
209210

210211
#else /* !CONFIG_SCHED_CLASS_EXT */
211212

212213
static inline void sched_ext_free(struct task_struct *p) {}
213214
static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {}
214215
static inline void scx_softlockup(u32 dur_s) {}
216+
static inline bool scx_rcu_cpu_stall(void) { return false; }
215217

216218
#endif /* CONFIG_SCHED_CLASS_EXT */
219+
220+
struct scx_task_group {
221+
#ifdef CONFIG_EXT_GROUP_SCHED
222+
u32 flags; /* SCX_TG_* */
223+
u32 weight;
224+
u64 bw_period_us;
225+
u64 bw_quota_us;
226+
u64 bw_burst_us;
227+
#endif
228+
};
229+
217230
#endif /* _LINUX_SCHED_EXT_H */

init/Kconfig

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,6 +1081,9 @@ if CGROUP_SCHED
10811081
config GROUP_SCHED_WEIGHT
10821082
def_bool n
10831083

1084+
config GROUP_SCHED_BANDWIDTH
1085+
def_bool n
1086+
10841087
config FAIR_GROUP_SCHED
10851088
bool "Group scheduling for SCHED_OTHER"
10861089
depends on CGROUP_SCHED
@@ -1090,6 +1093,7 @@ config FAIR_GROUP_SCHED
10901093
config CFS_BANDWIDTH
10911094
bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
10921095
depends on FAIR_GROUP_SCHED
1096+
select GROUP_SCHED_BANDWIDTH
10931097
default n
10941098
help
10951099
This option allows users to define CPU bandwidth rates (limits) for
@@ -1124,6 +1128,7 @@ config EXT_GROUP_SCHED
11241128
bool
11251129
depends on SCHED_CLASS_EXT && CGROUP_SCHED
11261130
select GROUP_SCHED_WEIGHT
1131+
select GROUP_SCHED_BANDWIDTH
11271132
default y
11281133

11291134
endif #CGROUP_SCHED

kernel/rcu/tree_stall.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,13 @@ static void panic_on_rcu_stall(void)
163163
{
164164
static int cpu_stall;
165165

166+
/*
167+
* Attempt to kick out the BPF scheduler if it's installed and defer
168+
* the panic to give the system a chance to recover.
169+
*/
170+
if (scx_rcu_cpu_stall())
171+
return;
172+
166173
if (++cpu_stall < sysctl_max_rcu_stall_to_panic)
167174
return;
168175

kernel/sched/core.c

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9815,7 +9815,9 @@ static int cpu_cfs_local_stat_show(struct seq_file *sf, void *v)
98159815

98169816
return 0;
98179817
}
9818+
#endif /* CONFIG_CFS_BANDWIDTH */
98189819

9820+
#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
98199821
const u64 max_bw_quota_period_us = 1 * USEC_PER_SEC; /* 1s */
98209822
static const u64 min_bw_quota_period_us = 1 * USEC_PER_MSEC; /* 1ms */
98219823
/* More than 203 days if BW_SHIFT equals 20. */
@@ -9824,12 +9826,21 @@ static const u64 max_bw_runtime_us = MAX_BW;
98249826
static void tg_bandwidth(struct task_group *tg,
98259827
u64 *period_us_p, u64 *quota_us_p, u64 *burst_us_p)
98269828
{
9829+
#ifdef CONFIG_CFS_BANDWIDTH
98279830
if (period_us_p)
98289831
*period_us_p = tg_get_cfs_period(tg);
98299832
if (quota_us_p)
98309833
*quota_us_p = tg_get_cfs_quota(tg);
98319834
if (burst_us_p)
98329835
*burst_us_p = tg_get_cfs_burst(tg);
9836+
#else /* !CONFIG_CFS_BANDWIDTH */
9837+
if (period_us_p)
9838+
*period_us_p = tg->scx.bw_period_us;
9839+
if (quota_us_p)
9840+
*quota_us_p = tg->scx.bw_quota_us;
9841+
if (burst_us_p)
9842+
*burst_us_p = tg->scx.bw_burst_us;
9843+
#endif /* CONFIG_CFS_BANDWIDTH */
98339844
}
98349845

98359846
static u64 cpu_period_read_u64(struct cgroup_subsys_state *css,
@@ -9845,6 +9856,7 @@ static int tg_set_bandwidth(struct task_group *tg,
98459856
u64 period_us, u64 quota_us, u64 burst_us)
98469857
{
98479858
const u64 max_usec = U64_MAX / NSEC_PER_USEC;
9859+
int ret = 0;
98489860

98499861
if (tg == &root_task_group)
98509862
return -EINVAL;
@@ -9882,7 +9894,12 @@ static int tg_set_bandwidth(struct task_group *tg,
98829894
burst_us + quota_us > max_bw_runtime_us))
98839895
return -EINVAL;
98849896

9885-
return tg_set_cfs_bandwidth(tg, period_us, quota_us, burst_us);
9897+
#ifdef CONFIG_CFS_BANDWIDTH
9898+
ret = tg_set_cfs_bandwidth(tg, period_us, quota_us, burst_us);
9899+
#endif /* CONFIG_CFS_BANDWIDTH */
9900+
if (!ret)
9901+
scx_group_set_bandwidth(tg, period_us, quota_us, burst_us);
9902+
return ret;
98869903
}
98879904

98889905
static s64 cpu_quota_read_s64(struct cgroup_subsys_state *css,
@@ -9935,7 +9952,7 @@ static int cpu_burst_write_u64(struct cgroup_subsys_state *css,
99359952
tg_bandwidth(tg, &period_us, &quota_us, NULL);
99369953
return tg_set_bandwidth(tg, period_us, quota_us, burst_us);
99379954
}
9938-
#endif /* CONFIG_CFS_BANDWIDTH */
9955+
#endif /* CONFIG_GROUP_SCHED_BANDWIDTH */
99399956

99409957
#ifdef CONFIG_RT_GROUP_SCHED
99419958
static int cpu_rt_runtime_write(struct cgroup_subsys_state *css,
@@ -9995,7 +10012,7 @@ static struct cftype cpu_legacy_files[] = {
999510012
.write_s64 = cpu_idle_write_s64,
999610013
},
999710014
#endif
9998-
#ifdef CONFIG_CFS_BANDWIDTH
10015+
#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
999910016
{
1000010017
.name = "cfs_period_us",
1000110018
.read_u64 = cpu_period_read_u64,
@@ -10011,6 +10028,8 @@ static struct cftype cpu_legacy_files[] = {
1001110028
.read_u64 = cpu_burst_read_u64,
1001210029
.write_u64 = cpu_burst_write_u64,
1001310030
},
10031+
#endif
10032+
#ifdef CONFIG_CFS_BANDWIDTH
1001410033
{
1001510034
.name = "stat",
1001610035
.seq_show = cpu_cfs_stat_show,
@@ -10224,7 +10243,7 @@ static int __maybe_unused cpu_period_quota_parse(char *buf, u64 *period_us_p,
1022410243
return 0;
1022510244
}
1022610245

10227-
#ifdef CONFIG_CFS_BANDWIDTH
10246+
#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
1022810247
static int cpu_max_show(struct seq_file *sf, void *v)
1022910248
{
1023010249
struct task_group *tg = css_tg(seq_css(sf));
@@ -10271,7 +10290,7 @@ static struct cftype cpu_files[] = {
1027110290
.write_s64 = cpu_idle_write_s64,
1027210291
},
1027310292
#endif
10274-
#ifdef CONFIG_CFS_BANDWIDTH
10293+
#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
1027510294
{
1027610295
.name = "max",
1027710296
.flags = CFTYPE_NOT_ON_ROOT,

0 commit comments

Comments
 (0)