Skip to content

Commit cc9877f

Browse files
committed
sched_ext: Improve error reporting during loading
When the BPF scheduler fails, ops.exit() allows rich error reporting through scx_exit_info. Use scx.exit() path consistently for all failures which can be caused by the BPF scheduler: - scx_ops_error() is called after ops.init() and ops.cgroup_init() failure to record error information. - ops.init_task() failure now uses scx_ops_error() instead of pr_err(). - The err_disable path updated to automatically trigger scx_ops_error() to cover cases that the error message hasn't already been generated and always return 0 indicating init success so that the error is reported through ops.exit(). Signed-off-by: Tejun Heo <[email protected]> Cc: David Vernet <[email protected]> Cc: Daniel Hodges <[email protected]> Cc: Changwoo Min <[email protected]> Cc: Andrea Righi <[email protected]> Cc: Dan Schatzberg <[email protected]>
1 parent fcbc423 commit cc9877f

File tree

1 file changed

+19
-11
lines changed

1 file changed

+19
-11
lines changed

kernel/sched/ext.c

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,10 @@ struct sched_ext_ops {
625625
/**
626626
* exit - Clean up after the BPF scheduler
627627
* @info: Exit info
628+
*
629+
* ops.exit() is also called on ops.init() failure, which is a bit
630+
* unusual. This is to allow rich reporting through @info on how
631+
* ops.init() failed.
628632
*/
629633
void (*exit)(struct scx_exit_info *info);
630634

@@ -4117,6 +4121,7 @@ static int scx_cgroup_init(void)
41174121
css->cgroup, &args);
41184122
if (ret) {
41194123
css_put(css);
4124+
scx_ops_error("ops.cgroup_init() failed (%d)", ret);
41204125
return ret;
41214126
}
41224127
tg->scx_flags |= SCX_TG_INITED;
@@ -5041,6 +5046,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
50415046
if (ret) {
50425047
ret = ops_sanitize_err("init", ret);
50435048
cpus_read_unlock();
5049+
scx_ops_error("ops.init() failed (%d)", ret);
50445050
goto err_disable;
50455051
}
50465052
}
@@ -5150,8 +5156,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
51505156
spin_lock_irq(&scx_tasks_lock);
51515157
scx_task_iter_exit(&sti);
51525158
spin_unlock_irq(&scx_tasks_lock);
5153-
pr_err("sched_ext: ops.init_task() failed (%d) for %s[%d] while loading\n",
5154-
ret, p->comm, p->pid);
5159+
scx_ops_error("ops.init_task() failed (%d) for %s[%d]",
5160+
ret, p->comm, p->pid);
51555161
goto err_disable_unlock_all;
51565162
}
51575163

@@ -5199,14 +5205,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
51995205

52005206
scx_ops_bypass(false);
52015207

5202-
/*
5203-
* Returning an error code here would lose the recorded error
5204-
* information. Exit indicating success so that the error is notified
5205-
* through ops.exit() with all the details.
5206-
*/
52075208
if (!scx_ops_tryset_enable_state(SCX_OPS_ENABLED, SCX_OPS_ENABLING)) {
52085209
WARN_ON_ONCE(atomic_read(&scx_exit_kind) == SCX_EXIT_NONE);
5209-
ret = 0;
52105210
goto err_disable;
52115211
}
52125212

@@ -5241,10 +5241,18 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
52415241
scx_ops_bypass(false);
52425242
err_disable:
52435243
mutex_unlock(&scx_ops_enable_mutex);
5244-
/* must be fully disabled before returning */
5245-
scx_ops_disable(SCX_EXIT_ERROR);
5244+
/*
5245+
* Returning an error code here would not pass all the error information
5246+
* to userspace. Record errno using scx_ops_error() for cases
5247+
* scx_ops_error() wasn't already invoked and exit indicating success so
5248+
* that the error is notified through ops.exit() with all the details.
5249+
*
5250+
* Flush scx_ops_disable_work to ensure that error is reported before
5251+
* init completion.
5252+
*/
5253+
scx_ops_error("scx_ops_enable() failed (%d)", ret);
52465254
kthread_flush_work(&scx_ops_disable_work);
5247-
return ret;
5255+
return 0;
52485256
}
52495257

52505258

0 commit comments

Comments
 (0)