During energy calculation, it iterates cpumask bits according to eenv:sg_top; there have two race conditions between energy calculation flow with CPU hotplug flow, so in the middle of calculation any CPU might be hotplugged in or out.
This patch checks two conditions to confirm if hit any hotplug race condition, and if hit then directly bail out with returning error value. During calculation but haven't finished all CPUs iteration, if cpu_count is zero this means there have some CPU is concurrently hotplugged in system, so this is one race condition; after calculation has accomplished, we should expect cpu_count is zero so means all CPU has been traversed one by one, otherwise it means some CPUs have been hotplugged out and as result it has skipped for calculation, this is for detecting the second race condition. This patch refactors compute_energy() by using these two race conditions checking, and removed redundant variables.
Change-Id: I8dadea2229dabb8ddc60597978f724ccaae61311 Signed-off-by: Leo Yan leo.yan@linaro.org --- kernel/sched/fair.c | 102 +++++++++++++++++++++++----------------------------- 1 file changed, 44 insertions(+), 58 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e5d5ed5..49eee75 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5719,83 +5719,69 @@ static void calc_sg_energy(struct energy_env *eenv, int cpu) */ static int compute_energy(struct energy_env *eenv, int candidate) { - struct cpumask visit_cpus; int cpu_count; + int cpu; + struct sched_domain *sd;
WARN_ON(!eenv->sg_top->sge);
- cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top)); /* If a cpu is hotplugged in while we are in this function, - * it does not appear in the existing visit_cpus mask + * it does not appear in the existing eenv::sg_top mask * which came from the sched_group pointer of the * sched_domain pointed at by sd_ea for either the prev * or next cpu and was dereferenced in __energy_diff. - * Since we will dereference sd_scs later as we iterate + * Since we will dereference sd later as we iterate * through the CPUs we expect to visit, new CPUs can - * be present which are not in the visit_cpus mask. + * be present which are not in the eenv::sg_top mask. * Guard this with cpu_count. */ - cpu_count = cpumask_weight(&visit_cpus); + cpu_count = cpumask_weight(sched_group_cpus(eenv->sg_top)); + cpu = cpumask_first(sched_group_cpus(eenv->sg_top));
- while (!cpumask_empty(&visit_cpus)) { - int cpu = cpumask_first(&visit_cpus); - struct sched_domain *sd; + for_each_domain(cpu, sd) { + struct sched_group *sg = sd->groups;
- for_each_domain(cpu, sd) { - struct sched_group *sg = sd->groups; + do { + if (!cpumask_intersects(sched_group_cpus(eenv->sg_top), + sched_group_cpus(sg))) + continue;
- /* Has this sched_domain already been visited? */ - if (sd->child && group_first_cpu(sg) != cpu) - break; + /* + * Compute the energy for all the candidate + * CPUs in the current visited SG. + */ + eenv->sg = sg; + calc_sg_energy(eenv, candidate);
- do { + if (!sd->child) { /* - * Compute the energy for all the candidate - * CPUs in the current visited SG. + * cpu_count here is the number of cpus we + * expect to visit in this calculation. If + * we race against hotplug, we can have extra + * cpus added to the groups we are iterating + * which do not appear in the eenv::sg_top mask. + * In that case we are not able to calculate + * energy without restarting so we will bail + * out and use prev_cpu this time. */ - eenv->sg = sg; - calc_sg_energy(eenv, candidate); - - if (!sd->child) { - /* - * cpu_count here is the number of - * cpus we expect to visit in this - * calculation. If we race against - * hotplug, we can have extra cpus - * added to the groups we are - * iterating which do not appear in - * the visit_cpus mask. In that case - * we are not able to calculate energy - * without restarting so we will bail - * out and use prev_cpu this time. - */ - if (!cpu_count) - return -EINVAL; - cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg)); - cpu_count--; - } - - if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top))) - goto next_cpu; - - } while (sg = sg->next, sg != sd->groups); - } - - /* - * If we raced with hotplug and got an sd NULL-pointer; - * returning a wrong energy estimation is better than - * entering an infinite loop. - * Specifically: If a cpu is unplugged after we took - * the visit_cpus mask, it no longer has an sd_scs - * pointer, so when we dereference it, we get NULL. - */ - if (cpumask_test_cpu(cpu, &visit_cpus)) - return -EINVAL; -next_cpu: - cpumask_clear_cpu(cpu, &visit_cpus); - continue; + if (!cpu_count) + return -EINVAL; + cpu_count--; + } + } while (sg = sg->next, sg != sd->groups); }
+ /* + * If we raced with hotplug and got an sd NULL-pointer; + * returning a wrong energy estimation is better than + * entering an infinite loop. + * Specifically: If a cpu is unplugged after we took + * the eenv::sg_top mask, it no longer has an sd pointer, + * so when we dereference it, we get NULL. + */ + if (cpu_count) + return -EINVAL; + return 0; }
-- 1.9.1