scx_enable() calls scx_bypass(true) to initialize in bypass mode and then scx_bypass(false) on success to exit. If scx_enable() fails during task initialization - e.g. scx_cgroup_init() or scx_init_task() returns an error - it jumps to err_disable while bypass is still active. scx_disable_workfn() then calls scx_bypass(true/false) for its own bypass, leaving the bypass depth at 1 instead of 0. This causes the system to remain permanently in bypass mode after a failed scx_enable().
Failures after task initialization is complete - e.g. scx_tryset_enable_state() at the end - already call scx_bypass(false) before reaching the error path and are not affected. This only affects a subset of failure modes.
Fix it by tracking whether scx_enable() called scx_bypass(true) in a bool and having scx_disable_workfn() call an extra scx_bypass(false) to clear it. This is a temporary measure as the bypass depth will be moved into the sched instance, which will make this tracking unnecessary.
Fixes: 8c2090c504e9 ("sched_ext: Initialize in bypass mode") Cc: stable@vger.kernel.org # v6.12+ Reported-by: Chris Mason clm@meta.com Signed-off-by: Tejun Heo tj@kernel.org --- kernel/sched/ext.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+)
--- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -41,6 +41,13 @@ static bool scx_init_task_enabled; static bool scx_switching_all; DEFINE_STATIC_KEY_FALSE(__scx_switched_all);
+/* + * Tracks whether scx_enable() called scx_bypass(true). Used to balance bypass + * depth on enable failure. Will be removed when bypass depth is moved into the + * sched instance. + */ +static bool scx_bypassed_for_enable; + static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0); static atomic_long_t scx_hotplug_seq = ATOMIC_LONG_INIT(0);
@@ -4318,6 +4325,11 @@ static void scx_disable_workfn(struct kt scx_dsp_max_batch = 0; free_kick_syncs();
+ if (scx_bypassed_for_enable) { + scx_bypassed_for_enable = false; + scx_bypass(false); + } + mutex_unlock(&scx_enable_mutex);
WARN_ON_ONCE(scx_set_enable_state(SCX_DISABLED) != SCX_DISABLING); @@ -4970,6 +4982,7 @@ static int scx_enable(struct sched_ext_o * Init in bypass mode to guarantee forward progress. */ scx_bypass(true); + scx_bypassed_for_enable = true;
for (i = SCX_OPI_NORMAL_BEGIN; i < SCX_OPI_NORMAL_END; i++) if (((void (**)(void))ops)[i]) @@ -5067,6 +5080,7 @@ static int scx_enable(struct sched_ext_o scx_task_iter_stop(&sti); percpu_up_write(&scx_fork_rwsem);
+ scx_bypassed_for_enable = false; scx_bypass(false);
if (!scx_tryset_enable_state(SCX_ENABLED, SCX_ENABLING)) { -- tejun
Hi,
Thanks for your patch.
FYI: kernel test robot notices the stable kernel rule is not satisfied.
The check is based on https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html#opti...
Rule: The upstream commit ID must be specified with a separate line above the commit text. Subject: [PATCH sched_ext/for-6.19-fixes] sched_ext: Fix bypass depth leak on scx_enable() failure Link: https://lore.kernel.org/stable/286e6f7787a81239e1ce2989b52391ce%40kernel.org
Please ignore this mail if the patch is not relevant for upstream.
On Wed, Dec 10, 2025 at 05:13:38AM +0800, kernel test robot wrote:
Hi,
Thanks for your patch.
FYI: kernel test robot notices the stable kernel rule is not satisfied.
The check is based on https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html#opti...
Something went wrong, this seemed correct :(
On Tue Dec 9, 2025 at 4:04 PM EST, Tejun Heo wrote:
scx_enable() calls scx_bypass(true) to initialize in bypass mode and then scx_bypass(false) on success to exit. If scx_enable() fails during task initialization - e.g. scx_cgroup_init() or scx_init_task() returns an error - it jumps to err_disable while bypass is still active. scx_disable_workfn() then calls scx_bypass(true/false) for its own bypass, leaving the bypass depth at 1 instead of 0. This causes the system to remain permanently in bypass mode after a failed scx_enable().
Failures after task initialization is complete - e.g. scx_tryset_enable_state() at the end - already call scx_bypass(false) before reaching the error path and are not affected. This only affects a subset of failure modes.
Fix it by tracking whether scx_enable() called scx_bypass(true) in a bool and having scx_disable_workfn() call an extra scx_bypass(false) to clear it. This is a temporary measure as the bypass depth will be moved into the sched instance, which will make this tracking unnecessary.
Fixes: 8c2090c504e9 ("sched_ext: Initialize in bypass mode") Cc: stable@vger.kernel.org # v6.12+ Reported-by: Chris Mason clm@meta.com Signed-off-by: Tejun Heo tj@kernel.org
Reviewed-by: Emil Tsalapatis emil@etsalapatis.com
kernel/sched/ext.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+)
--- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -41,6 +41,13 @@ static bool scx_init_task_enabled; static bool scx_switching_all; DEFINE_STATIC_KEY_FALSE(__scx_switched_all);
+/*
- Tracks whether scx_enable() called scx_bypass(true). Used to balance bypass
- depth on enable failure. Will be removed when bypass depth is moved into the
- sched instance.
- */
+static bool scx_bypassed_for_enable;
static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0); static atomic_long_t scx_hotplug_seq = ATOMIC_LONG_INIT(0);
@@ -4318,6 +4325,11 @@ static void scx_disable_workfn(struct kt scx_dsp_max_batch = 0; free_kick_syncs();
if (scx_bypassed_for_enable) {
scx_bypassed_for_enable = false;scx_bypass(false);}
mutex_unlock(&scx_enable_mutex);
WARN_ON_ONCE(scx_set_enable_state(SCX_DISABLED) != SCX_DISABLING);
@@ -4970,6 +4982,7 @@ static int scx_enable(struct sched_ext_o * Init in bypass mode to guarantee forward progress. */ scx_bypass(true);
scx_bypassed_for_enable = true;
for (i = SCX_OPI_NORMAL_BEGIN; i < SCX_OPI_NORMAL_END; i++) if (((void (**)(void))ops)[i])
@@ -5067,6 +5080,7 @@ static int scx_enable(struct sched_ext_o scx_task_iter_stop(&sti); percpu_up_write(&scx_fork_rwsem);
scx_bypassed_for_enable = false; scx_bypass(false);
if (!scx_tryset_enable_state(SCX_ENABLED, SCX_ENABLING)) {
-- tejun
linux-stable-mirror@lists.linaro.org