With commit 8f111bc357aa ("cpufreq/schedutil: Rewrite CPUFREQ_RT support") schedutil governor uses rq->rt.rt_nr_running to detect whether a RT task is currently running on the CPU and to set frequency to max if necessary. cpufreq_update_util() is called in enqueue/dequeue_top_rt_rq() but rq->rt.rt_nr_running as not been updated yet when dequeue_top_rt_rq() is called so schedutil still considers that a RT task is running when the last task is dequeued. The update of rq->rt.rt_nr_running happens later in dequeue_rt_stack()
Fixes: 8f111bc357aa ('cpufreq/schedutil: Rewrite CPUFREQ_RT support') Cc: stable@vger.kernel.org # v4.16+ Signed-off-by: Vincent Guittot vincent.guittot@linaro.org --- - v2: - remove blank line
kernel/sched/rt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7aef6b4..a9f1119 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1000,9 +1000,6 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
sub_nr_running(rq, rt_rq->rt_nr_running); rt_rq->rt_queued = 0; - - /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ - cpufreq_update_util(rq, 0); }
static void @@ -1288,6 +1285,9 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags) if (on_rt_rq(rt_se)) __dequeue_rt_entity(rt_se, flags); } + + /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ + cpufreq_update_util(rq_of_rt_rq(rt_rq_of_se(back)), 0); }
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
On Thu, May 17, 2018 at 09:16:43AM +0200, Vincent Guittot wrote:
With commit
8f111bc357aa ("cpufreq/schedutil: Rewrite CPUFREQ_RT support")
schedutil governor uses rq->rt.rt_nr_running to detect whether a RT task is currently running on the CPU and to set frequency to max if necessary.
cpufreq_update_util() is called in enqueue/dequeue_top_rt_rq() but rq->rt.rt_nr_running as not been updated yet when dequeue_top_rt_rq() is called so schedutil still considers that a RT task is running when the last task is dequeued. The update of rq->rt.rt_nr_running happens later in dequeue_rt_stack()
Fixes: 8f111bc357aa ('cpufreq/schedutil: Rewrite CPUFREQ_RT support') Cc: stable@vger.kernel.org # v4.16+ Signed-off-by: Vincent Guittot vincent.guittot@linaro.org
- v2:
- remove blank line
kernel/sched/rt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7aef6b4..a9f1119 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1000,9 +1000,6 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq) sub_nr_running(rq, rt_rq->rt_nr_running); rt_rq->rt_queued = 0;
- /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
- cpufreq_update_util(rq, 0);
} static void @@ -1288,6 +1285,9 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags) if (on_rt_rq(rt_se)) __dequeue_rt_entity(rt_se, flags); }
- /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
- cpufreq_update_util(rq_of_rt_rq(rt_rq_of_se(back)), 0);
}
Hurm.. I think this is also wrong. See how dequeue_rt_stack() is also called from the enqueue path. Also, nothing calls cpufreq_update_util() on the throttle path now.
And talking about throttle; I think we wanted to check rt_queued in that case.
How about the below?
--- kernel/sched/cpufreq_schedutil.c | 2 +- kernel/sched/rt.c | 24 ++++++++++++++---------- kernel/sched/sched.h | 5 +++++ 3 files changed, 20 insertions(+), 11 deletions(-)
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index e13df951aca7..1751f9630e49 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -185,7 +185,7 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu) struct rq *rq = cpu_rq(sg_cpu->cpu); unsigned long util;
- if (rq->rt.rt_nr_running) { + if (rt_rq_is_runnable(&rq->rt)) { util = sg_cpu->max; } else { util = sg_cpu->util_dl; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7aef6b4e885a..a1108a7da777 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -979,8 +979,14 @@ static void update_curr_rt(struct rq *rq) if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { raw_spin_lock(&rt_rq->rt_runtime_lock); rt_rq->rt_time += delta_exec; - if (sched_rt_runtime_exceeded(rt_rq)) + if (sched_rt_runtime_exceeded(rt_rq)) { + struct rq *rq = rq_of_rt_rq(rt_rq); + + if (&rq->rt == rt_rq) + cpufreq_update_util(rq, 0); + resched_curr(rq); + } raw_spin_unlock(&rt_rq->rt_runtime_lock); } } @@ -1000,9 +1006,6 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
sub_nr_running(rq, rt_rq->rt_nr_running); rt_rq->rt_queued = 0; - - /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ - cpufreq_update_util(rq, 0); }
static void @@ -1019,9 +1022,6 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)
add_nr_running(rq, rt_rq->rt_nr_running); rt_rq->rt_queued = 1; - - /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ - cpufreq_update_util(rq, 0); }
#if defined CONFIG_SMP @@ -1330,6 +1330,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
if (!task_current(rq, p) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); + + cpufreq_update_util(rq, 0); }
static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) @@ -1340,6 +1342,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) dequeue_rt_entity(rt_se, flags);
dequeue_pushable_task(rq, p); + + cpufreq_update_util(rq, 0); }
/* @@ -1533,6 +1537,9 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) struct task_struct *p; struct rt_rq *rt_rq = &rq->rt;
+ if (!rt_rq->rt_queued) + return NULL; + if (need_pull_rt_task(rq, prev)) { /* * This is OK, because current is on_cpu, which avoids it being @@ -1560,9 +1567,6 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) if (prev->sched_class == &rt_sched_class) update_curr_rt(rq);
- if (!rt_rq->rt_queued) - return NULL; - put_prev_task(rq, prev);
p = _pick_next_task_rt(rq); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c9895d35c5f7..bec7f2eecaf3 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -609,6 +609,11 @@ struct rt_rq { #endif };
+static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq) +{ + return rq_rq->rt_queued && rt_rq->rt_nr_running; +} + /* Deadline class' related fields in a runqueue */ struct dl_rq { /* runqueue is an rbtree, ordered by deadline */
On Thu, May 17, 2018 at 11:05:30AM +0200, Peter Zijlstra wrote:
Hurm.. I think this is also wrong. See how dequeue_rt_stack() is also called from the enqueue path. Also, nothing calls cpufreq_update_util() on the throttle path now.
And talking about throttle; I think we wanted to check rt_queued in that case.
Bah, clearly you also want to update on unthrottle.. but I think there's more buggered wrt throtteling.
Le Thursday 17 May 2018 à 11:32:06 (+0200), Peter Zijlstra a écrit :
On Thu, May 17, 2018 at 11:05:30AM +0200, Peter Zijlstra wrote:
Hurm.. I think this is also wrong. See how dequeue_rt_stack() is also called from the enqueue path. Also, nothing calls cpufreq_update_util() on the throttle path now.
Yes I missed the point that rt_entities were dequeued then re-enqueued when a task was either enqueued or dequeued.
That's also means that enqueue_top_rt_rq() is always called when a task is enqueued or dequeued and also when groups are throttled or unthrottled In fact, the only point where it's not called, is when root rt_rq is throttled. So I have prepared the patch below which call cpufreq_update util in enqueue_top_rt_rq() and also when we throttle the root rt_rq. According to the tests I have done , it seems to work for enqueue/dequeue and throttle/unthrottle use cases.
I have re-used your rt_rq_is_runnable() which takes into account rt_queued
--- kernel/sched/cpufreq_schedutil.c | 2 +- kernel/sched/rt.c | 16 ++++++++++------ kernel/sched/sched.h | 5 +++++ 3 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index e13df95..1751f96 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -185,7 +185,7 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu) struct rq *rq = cpu_rq(sg_cpu->cpu); unsigned long util;
- if (rq->rt.rt_nr_running) { + if (rt_rq_is_runnable(&rq->rt)) { util = sg_cpu->max; } else { util = sg_cpu->util_dl; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7aef6b4..d6b9517 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -508,8 +508,11 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
rt_se = rt_rq->tg->rt_se[cpu];
- if (!rt_se) + if (!rt_se) { dequeue_top_rt_rq(rt_rq); + /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ + cpufreq_update_util(rq_of_rt_rq(rt_rq), 0); + } else if (on_rt_rq(rt_se)) dequeue_rt_entity(rt_se, 0); } @@ -1001,8 +1004,6 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq) sub_nr_running(rq, rt_rq->rt_nr_running); rt_rq->rt_queued = 0;
- /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ - cpufreq_update_util(rq, 0); }
static void @@ -1014,11 +1015,14 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)
if (rt_rq->rt_queued) return; - if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running) + + if (rt_rq_throttled(rt_rq)) return;
- add_nr_running(rq, rt_rq->rt_nr_running); - rt_rq->rt_queued = 1; + if (rt_rq->rt_nr_running) { + add_nr_running(rq, rt_rq->rt_nr_running); + rt_rq->rt_queued = 1; + }
/* Kick cpufreq (see the comment in kernel/sched/sched.h). */ cpufreq_update_util(rq, 0); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c9895d3..bbebcfe 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -609,6 +609,11 @@ struct rt_rq { #endif };
+static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq) +{ + return rt_rq->rt_queued && rt_rq->rt_nr_running; +} + /* Deadline class' related fields in a runqueue */ struct dl_rq { /* runqueue is an rbtree, ordered by deadline */
Hi Peter,
On 17 May 2018 at 22:12, Vincent Guittot vincent.guittot@linaro.org wrote:
Le Thursday 17 May 2018 à 11:32:06 (+0200), Peter Zijlstra a écrit :
On Thu, May 17, 2018 at 11:05:30AM +0200, Peter Zijlstra wrote:
Hurm.. I think this is also wrong. See how dequeue_rt_stack() is also called from the enqueue path. Also, nothing calls cpufreq_update_util() on the throttle path now.
Yes I missed the point that rt_entities were dequeued then re-enqueued when a task was either enqueued or dequeued.
That's also means that enqueue_top_rt_rq() is always called when a task is enqueued or dequeued and also when groups are throttled or unthrottled In fact, the only point where it's not called, is when root rt_rq is throttled. So I have prepared the patch below which call cpufreq_update util in enqueue_top_rt_rq() and also when we throttle the root rt_rq. According to the tests I have done , it seems to work for enqueue/dequeue and throttle/unthrottle use cases.
I have re-used your rt_rq_is_runnable() which takes into account rt_queued
What is the status for this problem ?
The patch below conflict with b29bbbbff0d6 ('sched/cpufreq: always consider blocked FAIR utilization') that has been merged in tip/sched/core
Do you want me to rebase it ?
Regards, Vincent
kernel/sched/cpufreq_schedutil.c | 2 +- kernel/sched/rt.c | 16 ++++++++++------ kernel/sched/sched.h | 5 +++++ 3 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index e13df95..1751f96 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -185,7 +185,7 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu) struct rq *rq = cpu_rq(sg_cpu->cpu); unsigned long util;
if (rq->rt.rt_nr_running) {
if (rt_rq_is_runnable(&rq->rt)) { util = sg_cpu->max; } else { util = sg_cpu->util_dl;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7aef6b4..d6b9517 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -508,8 +508,11 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
rt_se = rt_rq->tg->rt_se[cpu];
if (!rt_se)
if (!rt_se) { dequeue_top_rt_rq(rt_rq);
/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
} else if (on_rt_rq(rt_se)) dequeue_rt_entity(rt_se, 0);
} @@ -1001,8 +1004,6 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq) sub_nr_running(rq, rt_rq->rt_nr_running); rt_rq->rt_queued = 0;
/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
cpufreq_update_util(rq, 0);
}
static void @@ -1014,11 +1015,14 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)
if (rt_rq->rt_queued) return;
if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running)
if (rt_rq_throttled(rt_rq)) return;
add_nr_running(rq, rt_rq->rt_nr_running);
rt_rq->rt_queued = 1;
if (rt_rq->rt_nr_running) {
add_nr_running(rq, rt_rq->rt_nr_running);
rt_rq->rt_queued = 1;
} /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ cpufreq_update_util(rq, 0);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c9895d3..bbebcfe 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -609,6 +609,11 @@ struct rt_rq { #endif };
+static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq) +{
return rt_rq->rt_queued && rt_rq->rt_nr_running;
+}
/* Deadline class' related fields in a runqueue */ struct dl_rq { /* runqueue is an rbtree, ordered by deadline */ -- 2.7.4
And talking about throttle; I think we wanted to check rt_queued in that case.
Bah, clearly you also want to update on unthrottle.. but I think there's more buggered wrt throtteling.
With commit 8f111bc357aa ("cpufreq/schedutil: Rewrite CPUFREQ_RT support") schedutil governor uses rq->rt.rt_nr_running to detect whether a RT task is currently running on the CPU and to set frequency to max if necessary.
cpufreq_update_util() is called in enqueue/dequeue_top_rt_rq() but rq->rt.rt_nr_running has not been updated yet when dequeue_top_rt_rq() is called so schedutil still considers that a RT task is running when the last task is dequeued. The update of rq->rt.rt_nr_running happens later in dequeue_rt_stack().
In fact, we can take advantage of the sequence that the dequeue then re-enqueue rt entities when a rt task is enqueued or dequeued; As a result enqueue_top_rt_rq() is always called when a task is enqueued or dequeued and also when groups are throttled or unthrottled. The only place that not use enqueue_top_rt_rq() is when root rt_rq is throttled.
Fixes: 8f111bc357aa ('cpufreq/schedutil: Rewrite CPUFREQ_RT support') Cc: stable@vger.kernel.org # v4.16+ Signed-off-by: Vincent Guittot vincent.guittot@linaro.org --- kernel/sched/cpufreq_schedutil.c | 2 +- kernel/sched/rt.c | 16 ++++++++++------ kernel/sched/sched.h | 5 +++++ 3 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 3cde464..c907fde 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -192,7 +192,7 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu) { struct rq *rq = cpu_rq(sg_cpu->cpu);
- if (rq->rt.rt_nr_running) + if (rt_rq_is_runnable(&rq->rt)) return sg_cpu->max;
/* diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 47556b0..5725670 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -508,8 +508,11 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
rt_se = rt_rq->tg->rt_se[cpu];
- if (!rt_se) + if (!rt_se) { dequeue_top_rt_rq(rt_rq); + /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ + cpufreq_update_util(rq_of_rt_rq(rt_rq), 0); + } else if (on_rt_rq(rt_se)) dequeue_rt_entity(rt_se, 0); } @@ -1001,8 +1004,6 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq) sub_nr_running(rq, rt_rq->rt_nr_running); rt_rq->rt_queued = 0;
- /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ - cpufreq_update_util(rq, 0); }
static void @@ -1014,11 +1015,14 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)
if (rt_rq->rt_queued) return; - if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running) + + if (rt_rq_throttled(rt_rq)) return;
- add_nr_running(rq, rt_rq->rt_nr_running); - rt_rq->rt_queued = 1; + if (rt_rq->rt_nr_running) { + add_nr_running(rq, rt_rq->rt_nr_running); + rt_rq->rt_queued = 1; + }
/* Kick cpufreq (see the comment in kernel/sched/sched.h). */ cpufreq_update_util(rq, 0); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6601baf..27ddec3 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -609,6 +609,11 @@ struct rt_rq { #endif };
+static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq) +{ + return rt_rq->rt_queued && rt_rq->rt_nr_running; +} + /* Deadline class' related fields in a runqueue */ struct dl_rq { /* runqueue is an rbtree, ordered by deadline */
linux-stable-mirror@lists.linaro.org