With a lot of small tasks, the softirq sched is nearly never called when no_hz is enable. The load_balance is mainly called with the newly_idle mode which doesn't update the cpu_power. Add a next_update field which ensures a maximum update period when there is short activity
Signed-off-by: Vincent Guittot vincent.guittot@linaro.org --- include/linux/sched.h | 1 + kernel/sched_fair.c | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h index 41d0237..8610921 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -901,6 +901,7 @@ struct sched_group_power { * single CPU. */ unsigned int power, power_orig; + unsigned long next_update; };
struct sched_group { diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index bc8ee99..3961876 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -2667,6 +2667,11 @@ static void update_group_power(struct sched_domain *sd, int cpu) struct sched_domain *child = sd->child; struct sched_group *group, *sdg = sd->groups; unsigned long power; + unsigned long interval; + + interval = msecs_to_jiffies(sd->balance_interval); + interval = clamp(interval, 1UL, max_load_balance_interval); + sdg->sgp->next_update = jiffies + interval;
if (!child) { update_cpu_power(sd, cpu); @@ -2774,12 +2779,15 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, * domains. In the newly idle case, we will allow all the cpu's * to do the newly idle load balance. */ - if (idle != CPU_NEWLY_IDLE && local_group) { - if (balance_cpu != this_cpu) { - *balance = 0; - return; - } - update_group_power(sd, this_cpu); + if (local_group) { + if (idle != CPU_NEWLY_IDLE) { + if (balance_cpu != this_cpu) { + *balance = 0; + return; + } + update_group_power(sd, this_cpu); + } else if (time_after_eq(jiffies, group->sgp->next_update)) + update_group_power(sd, this_cpu); }
/* Adjust by relative CPU power of the group */