6.6-stable review patch. If anyone has any objections, please let me know.
------------------
From: Tianchen Ding dtcccc@linux.alibaba.com
[ Upstream commit faa42d29419def58d3c3e5b14ad4037f0af3b496 ]
Consider the following cgroup:
root | ------------------------ | | normal_cgroup idle_cgroup | | SCHED_IDLE task_A SCHED_NORMAL task_B
According to the cgroup hierarchy, A should preempt B. But current check_preempt_wakeup_fair() treats cgroup se and task separately, so B will preempt A unexpectedly. Unify the wakeup logic by {c,p}se_is_idle only. This makes SCHED_IDLE of a task a relative policy that is effective only within its own cgroup, similar to the behavior of NICE.
Also fix se_is_idle() definition when !CONFIG_FAIR_GROUP_SCHED.
Fixes: 304000390f88 ("sched: Cgroup SCHED_IDLE support") Signed-off-by: Tianchen Ding dtcccc@linux.alibaba.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Josh Don joshdon@google.com Reviewed-by: Vincent Guittot vincent.guittot@linaro.org Link: https://lkml.kernel.org/r/20240626023505.1332596-1-dtcccc@linux.alibaba.com Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/sched/fair.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b2e1009e5706e..5fc0d9cc9d9d7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -533,7 +533,7 @@ static int cfs_rq_is_idle(struct cfs_rq *cfs_rq)
static int se_is_idle(struct sched_entity *se) { - return 0; + return task_has_idle_policy(task_of(se)); }
#endif /* CONFIG_FAIR_GROUP_SCHED */ @@ -8209,16 +8209,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ if (test_tsk_need_resched(curr)) return;
- /* Idle tasks are by definition preempted by non-idle tasks. */ - if (unlikely(task_has_idle_policy(curr)) && - likely(!task_has_idle_policy(p))) - goto preempt; - - /* - * Batch and idle tasks do not preempt non-idle tasks (their preemption - * is driven by the tick): - */ - if (unlikely(p->policy != SCHED_NORMAL) || !sched_feat(WAKEUP_PREEMPTION)) + if (!sched_feat(WAKEUP_PREEMPTION)) return;
find_matching_se(&se, &pse); @@ -8228,7 +8219,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ pse_is_idle = se_is_idle(pse);
/* - * Preempt an idle group in favor of a non-idle group (and don't preempt + * Preempt an idle entity in favor of a non-idle entity (and don't preempt * in the inverse case). */ if (cse_is_idle && !pse_is_idle) @@ -8236,9 +8227,14 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ if (cse_is_idle != pse_is_idle) return;
+ /* + * BATCH and IDLE tasks do not preempt others. + */ + if (unlikely(p->policy != SCHED_NORMAL)) + return; + cfs_rq = cfs_rq_of(se); update_curr(cfs_rq); - /* * XXX pick_eevdf(cfs_rq) != se ? */