In the old code the scheduler uses util_avg signal to calculate the energy difference, so it has below attributions:
The previous CPU util_avg retains the decayed value for waken task, so naturally we decrease the waken task util_avg from CPU util_avg for energy difference calculation, but in some cases the CPU util_avg has been decayed to 0 but waken task util_avg keeps a big value before sleeping, so it finally cannot reflect the energy decreasing if the waken task is migrated away from this CPU.
This patch is to introduce signal util_waken_avg for CPU, this is based on Morten patch ('sched/fair: Compute task/cpu utilization at wake-up more correctly') so we can achieve all CPUs pure utilization value which have been totally removed the task retained util value, this pure utilization value we use util_waken_avg to present it. This gives good basis for CPU utilization, so scheduler can estimate more accurate CPU utilization value: util_waken_avg + task_util(p). This is good for energy calculation correctness.
Signed-off-by: Leo Yan leo.yan@linaro.org --- include/linux/sched.h | 2 +- kernel/sched/fair.c | 27 ++++++++++++++++++--------- 2 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h index ad2c304..5b1c7d0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1280,7 +1280,7 @@ struct load_weight { struct sched_avg { u64 last_update_time, load_sum; u32 util_sum, period_contrib; - unsigned long load_avg, util_avg; + unsigned long load_avg, util_avg, util_waken_avg; };
#ifdef CONFIG_SCHEDSTATS diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d9a2969..6e7279c 100755 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5326,7 +5326,7 @@ struct energy_env { */ static unsigned long __cpu_norm_util(int cpu, unsigned long capacity, int delta) { - int util = __cpu_util(cpu, delta); + int util = cpu_rq(cpu)->cfs.avg.util_waken_avg + delta;
if (util >= capacity) return SCHED_CAPACITY_SCALE; @@ -5334,12 +5334,14 @@ static unsigned long __cpu_norm_util(int cpu, unsigned long capacity, int delta) return (util << SCHED_CAPACITY_SHIFT)/capacity; }
+static inline unsigned long task_util(struct task_struct *p); + static int calc_util_delta(struct energy_env *eenv, int cpu) { - if (cpu == eenv->src_cpu) - return -eenv->util_delta; - if (cpu == eenv->dst_cpu) - return eenv->util_delta; + if (cpu == eenv->src_cpu && !eenv->util_delta) + return task_util(eenv->task); + if (cpu == eenv->dst_cpu && eenv->util_delta) + return task_util(eenv->task); return 0; }
@@ -5351,7 +5353,7 @@ unsigned long group_max_util(struct energy_env *eenv)
for_each_cpu(i, sched_group_cpus(eenv->sg_cap)) { delta = calc_util_delta(eenv, i); - max_util = max(max_util, __cpu_util(i, delta)); + max_util = max(max_util, cpu_rq(i)->cfs.avg.util_waken_avg + delta); }
return max_util; @@ -6325,9 +6327,15 @@ static int energy_aware_select_candidate_cpu(struct task_struct *p, task_util_boosted = boosted_task_util(p);
/* Find cpu with sufficient capacity */ - for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) { + for_each_cpu(i, sched_group_cpus(sg)) { + + wake_util = cpu_util_wake(i, p);
- wake_util = cpu_util(i); + /* update waken avg */ + cpu_rq(i)->cfs.avg.util_waken_avg = wake_util; + + if (unlikely(!cpumask_test_cpu(i, tsk_cpus_allowed(p)))) + continue;
/* * p's blocked utilization is still accounted for on prev_cpu @@ -6370,7 +6378,8 @@ static int energy_aware_select_candidate_cpu(struct task_struct *p, continue;
/* Keep previous CPU and pack tasks if possible */ - if (i == prev_cpu || wake_util > cpu_util(cpu)) + if (i == prev_cpu || + wake_util > cpu_rq(cpu)->cfs.avg.util_waken_avg) cpu = i; } } -- 1.9.1