In previous code the energy calculation is CPU energy focused, so the main idea is to calculate how much power is consumed by CPUs before and after task migration. This idea inherently is to bind the energy comparison between a new selected CPU and previous CPU, but for some cases the previous is not a ideal CPU for the task; another shortcoming is we never get to know how much the task consumes power when it's placed on one specific CPU.
The more intuitive method is to calculate the energy for task oriented, so the idea is to calculate the power consumption for the waken task for every possible CPU and select best CPU with lowest power consumption.
This patch reworks energy calculation to follow idea for task oriented. To achieve this purpose, it introduces a new struct task_energy to calculate task energy for placing it on specific CPU; and still use struct energy_env to maintain the energy comparison context between different CPUs and still can be used for PE filter.
Signed-off-by: Leo Yan leo.yan@linaro.org --- kernel/sched/fair.c | 239 ++++++++++++++++++++++++++++------------------------ 1 file changed, 128 insertions(+), 111 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9370b5b..6833524 100755 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5285,31 +5285,41 @@ static inline bool energy_aware(void) }
struct energy_env { - cpumask_t search_cpus; - int target_cpu; + cpumask_t search_cpus; /* possible CPUs */ + int cpu_best; /* best CPU */ + int cpu_comp; /* compared CPU */ + + struct task_struct *task; /* waken task */ + int task_util; /* waken task util */
- struct sched_group *sg_top; - struct sched_group *sg_cap; - int cap_idx; - int util_delta; - int src_cpu; - int dst_cpu; - int energy; int payoff; - struct task_struct *task; + struct { - int before; - int after; + int best; + int comp; int delta; int diff; } nrg; + struct { - int before; - int after; + int best; + int comp; int delta; } cap; };
+struct task_energy { + int cpu; /* CPU */ + struct task_struct *task; /* waken task */ + int task_util; /* waken task util */ + + struct sched_group *sg_top; + struct sched_group *sg_cap; + int cap_idx; + int cap; + int nrg; +}; + /* * __cpu_norm_util() returns the cpu util relative to a specific capacity, * i.e. it's busy ratio, in the range [0..SCHED_LOAD_SCALE] which is useful for @@ -5336,23 +5346,22 @@ static unsigned long __cpu_norm_util(int cpu, unsigned long capacity, int delta)
static inline unsigned long task_util(struct task_struct *p);
-static int calc_util_delta(struct energy_env *eenv, int cpu) +static int calc_util_delta(struct task_energy *tsk_nrg, int cpu) { - if (cpu == eenv->src_cpu && !eenv->util_delta) - return task_util(eenv->task); - if (cpu == eenv->dst_cpu && eenv->util_delta) - return task_util(eenv->task); + if (cpu == tsk_nrg->cpu) + return tsk_nrg->task_util; + return 0; }
static -unsigned long group_max_util(struct energy_env *eenv) +unsigned long group_max_util(struct task_energy *tsk_nrg) { int i, delta; unsigned long max_util = 0;
- for_each_cpu(i, sched_group_cpus(eenv->sg_cap)) { - delta = calc_util_delta(eenv, i); + for_each_cpu(i, sched_group_cpus(tsk_nrg->sg_cap)) { + delta = calc_util_delta(tsk_nrg, i); max_util = max(max_util, cpu_rq(i)->cfs.avg.util_waken_avg + delta); }
@@ -5369,14 +5378,14 @@ unsigned long group_max_util(struct energy_env *eenv) * estimate (more busy). */ static unsigned -long group_norm_util(struct energy_env *eenv, struct sched_group *sg) +long group_norm_util(struct task_energy *tsk_nrg, struct sched_group *sg) { int i, delta; unsigned long util_sum = 0; - unsigned long capacity = sg->sge->cap_states[eenv->cap_idx].cap; + unsigned long capacity = sg->sge->cap_states[tsk_nrg->cap_idx].cap;
for_each_cpu(i, sched_group_cpus(sg)) { - delta = calc_util_delta(eenv, i); + delta = calc_util_delta(tsk_nrg, i); util_sum += __cpu_norm_util(i, capacity, delta); }
@@ -5427,16 +5436,16 @@ static int group_idle_state(struct sched_group *sg) * This can probably be done in a faster but more complex way. * Note: sched_group_energy() may fail when racing with sched_domain updates. */ -static int sched_group_energy(struct energy_env *eenv) +static int sched_group_energy(struct task_energy *tsk_nrg) { struct sched_domain *sd; int cpu, total_energy = 0; struct cpumask visit_cpus; struct sched_group *sg;
- WARN_ON(!eenv->sg_top->sge); + WARN_ON(!tsk_nrg->sg_top->sge);
- cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top)); + cpumask_copy(&visit_cpus, sched_group_cpus(tsk_nrg->sg_top));
while (!cpumask_empty(&visit_cpus)) { struct sched_group *sg_shared_cap = NULL; @@ -5465,30 +5474,21 @@ static int sched_group_energy(struct energy_env *eenv) int cap_idx, idle_idx;
if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight) - eenv->sg_cap = sg_shared_cap; + tsk_nrg->sg_cap = sg_shared_cap; else - eenv->sg_cap = sg; + tsk_nrg->sg_cap = sg;
- cap_idx = find_new_capacity(sg->sge, group_max_util(eenv)); - eenv->cap_idx = cap_idx; + cap_idx = find_new_capacity(sg->sge, group_max_util(tsk_nrg)); + tsk_nrg->cap_idx = cap_idx;
if (sg->group_weight == 1) { - /* Remove capacity of src CPU (before task move) */ - if (eenv->util_delta == 0 && - cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg))) { - eenv->cap.before = sg->sge->cap_states[cap_idx].cap; - eenv->cap.delta -= eenv->cap.before; - } - /* Add capacity of dst CPU (after task move) */ - if (eenv->util_delta != 0 && - cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg))) { - eenv->cap.after = sg->sge->cap_states[cap_idx].cap; - eenv->cap.delta += eenv->cap.after; + if (cpumask_test_cpu(tsk_nrg->cpu, sched_group_cpus(sg))) { + tsk_nrg->cap = sg->sge->cap_states[cap_idx].cap; } }
idle_idx = group_idle_state(sg); - group_util = group_norm_util(eenv, sg); + group_util = group_norm_util(tsk_nrg, sg); sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power); sg_idle_energy = ((SCHED_LOAD_SCALE-group_util) * sg->sge->idle_states[idle_idx].power); @@ -5498,7 +5498,7 @@ static int sched_group_energy(struct energy_env *eenv) if (!sd->child) cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg));
- if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top))) + if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(tsk_nrg->sg_top))) goto next_cpu;
} while (sg = sg->next, sg != sd->groups); @@ -5516,7 +5516,7 @@ next_cpu: continue; }
- eenv->energy = total_energy; + tsk_nrg->nrg = total_energy; return 0; }
@@ -5532,25 +5532,25 @@ static inline bool cpu_in_sg(struct sched_group *sg, int cpu) * utilization is removed from or added to the system (e.g. task wake-up). If * both are specified, the utilization is migrated. */ -static inline int __energy_diff(struct energy_env *eenv) +static inline int task_energy(struct energy_env *eenv) { struct sched_domain *sd; struct sched_group *sg; - int sd_cpu = -1, energy_before = 0, energy_after = 0; - int diff, margin; - - struct energy_env eenv_before = { - .util_delta = 0, - .src_cpu = eenv->src_cpu, - .dst_cpu = eenv->dst_cpu, - .nrg = { 0, 0, 0, 0}, - .cap = { 0, 0, 0 }, + int sd_cpu = -1; + + struct task_energy tsk_nrg = { + .cpu = eenv->cpu_comp, + .task = eenv->task, + .task_util = 0, };
- if (eenv->src_cpu == eenv->dst_cpu) - return 0; + struct task_energy tsk_nrg_after = { + .cpu = eenv->cpu_comp, + .task = eenv->task, + .task_util = eenv->task_util, + };
- sd_cpu = (eenv->src_cpu != -1) ? eenv->src_cpu : eenv->dst_cpu; + sd_cpu = eenv->cpu_comp; sd = rcu_dereference(per_cpu(sd_ea, sd_cpu));
if (!sd) @@ -5559,39 +5559,23 @@ static inline int __energy_diff(struct energy_env *eenv) sg = sd->groups;
do { - if (cpu_in_sg(sg, eenv->src_cpu) || cpu_in_sg(sg, eenv->dst_cpu)) { - eenv_before.sg_top = eenv->sg_top = sg; - - if (sched_group_energy(&eenv_before)) - return 0; /* Invalid result abort */ - energy_before += eenv_before.energy; - - /* Keep track of SRC cpu (before) capacity */ - eenv->cap.before = eenv_before.cap.before; - eenv->cap.delta = eenv_before.cap.delta; - - if (sched_group_energy(eenv)) - return 0; /* Invalid result abort */ - energy_after += eenv->energy; + if (cpu_in_sg(sg, tsk_nrg.cpu)) { + tsk_nrg.sg_top = sg; + tsk_nrg_after.sg_top = sg; + break; } } while (sg = sg->next, sg != sd->groups);
- eenv->nrg.before = energy_before; - eenv->nrg.after = energy_after; - eenv->nrg.diff = eenv->nrg.after - eenv->nrg.before; - eenv->payoff = 0; + if (sched_group_energy(&tsk_nrg)) + return 0; /* Invalid result abort */
- /* - * Dead-zone margin preventing too many migrations. - */ - - margin = eenv->nrg.before >> 6; /* ~1.56% */ - - diff = eenv->nrg.after - eenv->nrg.before; + if (sched_group_energy(&tsk_nrg_after)) + return 0; /* Invalid result abort */
- eenv->nrg.diff = (abs(diff) < margin) ? 0 : eenv->nrg.diff; + eenv->nrg.comp = tsk_nrg_after.nrg - tsk_nrg.nrg; + eenv->cap.comp = tsk_nrg_after.cap;
- return eenv->nrg.diff; + return 0; }
#ifdef CONFIG_SCHED_TUNE @@ -5633,18 +5617,18 @@ normalize_energy(int energy_diff) return (energy_diff < 0) ? -normalized_nrg : normalized_nrg; }
-static inline int -energy_diff(struct energy_env *eenv) +static inline int task_energy_diff(struct energy_env *eenv) { int boost = schedtune_task_boost(eenv->task); - int nrg_delta, ret; + int nrg_delta, diff;
/* Conpute "absolute" energy diff */ - __energy_diff(eenv); + eenv->nrg.diff = eenv->nrg.comp - eenv->nrg.best; + eenv->cap.delta = eenv->cap.comp - eenv->cap.best;
/* Return energy diff when boost margin is 0 */ if (boost == 0) { - ret = eenv->nrg.diff; + diff = eenv->nrg.diff; goto out; }
@@ -5665,18 +5649,34 @@ energy_diff(struct energy_env *eenv) * positive payoff, which is the condition for the acceptance of * a scheduling decision */ - ret = -eenv->payoff; + diff = -eenv->payoff;
out: trace_sched_energy_diff(eenv->task, - eenv->src_cpu, eenv->dst_cpu, eenv->util_delta, - eenv->nrg.before, eenv->nrg.after, eenv->nrg.diff, - eenv->cap.before, eenv->cap.after, eenv->cap.delta, - eenv->nrg.delta, eenv->payoff); + eenv->cpu_best, eenv->cpu_comp, eenv->task_util, + eenv->nrg.best, eenv->nrg.comp, eenv->nrg.diff, + eenv->cap.best, eenv->cap.comp, eenv->cap.delta, + eenv->nrg.delta, eenv->payoff);
- return ret; + return diff; } #else /* CONFIG_SCHED_TUNE */ + +static inline int task_energy_diff(struct energy_env *eenv) +{ + /* Conpute "absolute" energy diff */ + eenv->nrg.diff = eenv->nrg.comp - eenv->nrg.best; + eenv->cap.delta = eenv->cap.comp - eenv->cap.best; + + trace_sched_energy_diff(eenv->task, + eenv->cpu_best, eenv->cpu_comp, eenv->task_util, + eenv->nrg.best, eenv->nrg.comp, eenv->nrg.diff, + eenv->cap.best, eenv->cap.comp, eenv->cap.delta, + eenv->nrg.delta, eenv->payoff); + + return eenv->nrg.diff; +} + #define energy_diff(eenv) __energy_diff(eenv) #endif
@@ -6527,39 +6527,56 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync find_nrg_efficient_target(p, sd, &eenv); }
- eenv.target_cpu = -1; + eenv.cpu_best = -1; + eenv.cpu_comp = -1; + eenv.task = p; + eenv.task_util = task_util(p); + eenv.payoff = 0; + + /* directly return for only one CPU case */ + if (cpumask_weight(&eenv.search_cpus) == 1) { + target_cpu = cpumask_first(&eenv.search_cpus); + goto unlock; + }
for_each_cpu(cpu, &eenv.search_cpus) {
- if (eenv.target_cpu == -1) { - eenv.target_cpu = cpu; + if (eenv.cpu_best == -1) { + eenv.cpu_best = cpu; + eenv.cpu_comp = cpu; + + task_energy(&eenv); + + /* init energy data */ + eenv.nrg.best = eenv.nrg.comp; + eenv.cap.best = eenv.cap.comp; continue; }
if (unlikely(!task_util(p))) { - if (capacity_orig_of(cpu) < capacity_orig_of(eenv.target_cpu)) - eenv.target_cpu = cpu; + if (capacity_orig_of(cpu) < capacity_orig_of(eenv.cpu_best)) + eenv.cpu_best = cpu;
continue; }
- eenv.util_delta = task_util(p); - eenv.src_cpu = eenv.target_cpu; - eenv.dst_cpu = cpu; - eenv.task = p; - - if (energy_diff(&eenv) < 0) - eenv.target_cpu = cpu; + eenv.cpu_comp = cpu; + task_energy(&eenv); + if (task_energy_diff(&eenv) < 0) { + eenv.cpu_best = cpu; + eenv.nrg.best = eenv.nrg.comp; + eenv.cap.best = eenv.cap.comp; + } }
- if (eenv.target_cpu == -1) { + if (eenv.cpu_best == -1) { schedstat_inc(p, se.statistics.nr_wakeups_secb_no_nrg_sav); schedstat_inc(this_rq(), eas_stats.secb_no_nrg_sav); target_cpu = prev_cpu; } else { schedstat_inc(p, se.statistics.nr_wakeups_secb_nrg_sav); schedstat_inc(this_rq(), eas_stats.secb_nrg_sav); - target_cpu = eenv.target_cpu; + target_cpu = eenv.cpu_best; }
schedstat_inc(p, se.statistics.nr_wakeups_secb_count); -- 1.9.1