This patch drops the aggressive migration in load balancing by neglecting statisting of failing too many balance attempts.
In some architectures which are build around asymetric CPUs, ie ARM big.LITTLE the task was forced migrated even when there was no need. It was causing jumping around of the task and not utilizing all possible CPUs in the system.
Signed-off-by: Lukasz Luba l.luba@partner.samsung.com --- kernel/sched/fair.c | 79 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 16 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index cea6df0949a8..8ac2ad9a5b8d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7612,13 +7612,22 @@ static inline int migrate_degrades_locality(struct task_struct *p, } #endif
+static inline bool check_cpu_spare_capacity(int cpu, + unsigned int needed_spare_capacity) +{ + return (capacity_of(cpu) > + (cpu_util(cpu) + (needed_spare_capacity / 2))); + +} + /* * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? */ static int can_migrate_task(struct task_struct *p, struct lb_env *env) { - int tsk_cache_hot; + int tsk_cache_hot = 0; + int ret = 0;
lockdep_assert_held(&env->src_rq->lock);
@@ -7629,8 +7638,10 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) * 3) running (obviously), or * 4) are cache-hot on their current CPU. */ - if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu)) - return 0; + if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu)) { + ret = 0; + goto out; + }
if (!cpumask_test_cpu(env->dst_cpu, &p->cpus_allowed)) { int cpu; @@ -7647,8 +7658,11 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) * Avoid computing new_dst_cpu for NEWLY_IDLE or if we have * already computed one in current iteration. */ - if (env->idle == CPU_NEWLY_IDLE || (env->flags & LBF_DST_PINNED)) - return 0; + if (env->idle == CPU_NEWLY_IDLE || (env->flags & + LBF_DST_PINNED)) { + ret = 0; + goto out; + }
/* Prevent to re-select dst_cpu via env's cpus */ for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) { @@ -7659,7 +7673,8 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) } }
- return 0; + ret = 0; + goto out; }
/* Record that we found atleast one task that could run on dst_cpu */ @@ -7667,7 +7682,8 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
if (task_running(env->src_rq, p)) { schedstat_inc(p->se.statistics.nr_failed_migrations_running); - return 0; + ret = 0; + goto out; }
/* @@ -7686,11 +7702,15 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) schedstat_inc(env->sd->lb_hot_gained[env->idle]); schedstat_inc(p->se.statistics.nr_forced_migrations); } - return 1; + ret = 1; + goto out; }
schedstat_inc(p->se.statistics.nr_failed_migrations_hot); - return 0; +out: + trace_sched_can_migrate_task(p->pid, ret, tsk_cache_hot, env->src_cpu, + env->dst_cpu); + return ret; }
/* @@ -9085,9 +9105,36 @@ static struct rq *find_busiest_queue(struct lb_env *env, */ #define MAX_PINNED_INTERVAL 512
+static inline int need_park_into_spare_capacity(struct lb_env *env) +{ + bool fits_in = check_cpu_spare_capacity(env->dst_cpu, + cpu_util(env->src_cpu)); + int ret; + + if ((capacity_of(env->src_cpu) < capacity_of(env->dst_cpu)) && + env->src_rq->cfs.h_nr_running == 1 && + cpu_overutilized(env->src_cpu) && + !cpu_overutilized(env->dst_cpu) && + fits_in) { + ret = 1; + } else { + ret = 0; + } + + trace_sched_migrate_capacity_comparison(env->src_cpu, env->dst_cpu, + capacity_of(env->src_cpu), + capacity_of(env->dst_cpu), + cpu_util(env->src_cpu), + cpu_util(env->dst_cpu), ret); + + return ret; + +} + static int need_active_balance(struct lb_env *env) { struct sched_domain *sd = env->sd; + int need_balance = sd->nr_balance_failed > sd->cache_nice_tries + 2;
if (env->idle == CPU_NEWLY_IDLE) {
@@ -9114,14 +9161,11 @@ static int need_active_balance(struct lb_env *env) return 1; }
- if ((capacity_of(env->src_cpu) < capacity_of(env->dst_cpu)) && - env->src_rq->cfs.h_nr_running == 1 && - cpu_overutilized(env->src_cpu) && - !cpu_overutilized(env->dst_cpu)) { - return 1; - } + if (need_park_into_spare_capacity(env)) + return 1;
- return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); + trace_sched_need_active_balance(need_balance); + return 0; }
static int active_load_balance_cpu_stop(void *data); @@ -9345,6 +9389,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, if (need_active_balance(&env)) { unsigned long flags;
+ raw_spin_lock_irqsave(&busiest->lock, flags);
/* don't kick the active_load_balance_cpu_stop, @@ -9590,6 +9635,8 @@ static int active_load_balance_cpu_stop(void *data) struct task_struct *p = NULL; struct rq_flags rf;
+ trace_sched_active_lb_stop_cpu(busiest_cpu, target_cpu); + rq_lock_irq(busiest_rq, &rf); /* * Between queueing the stop-work and running it is a hole in which