On 20/02/17 20:24, Dietmar Eggemann wrote:
[...]
On 16/02/17 20:33, Thara Gopinath wrote:
[...]
@@ -7895,14 +7940,45 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd /* update overload indicator if we are at root domain */ if (env->dst_rq->rd->overload != overload) env->dst_rq->rd->overload = overload;
- }
/* Update over-utilization (tipping point, U >= 0) indicator */
if (env->dst_rq->rd->overutilized != overutilized)
env->dst_rq->rd->overutilized = overutilized;
- } else {
if (!env->dst_rq->rd->overutilized && overutilized)
env->dst_rq->rd->overutilized = true;
- if (overutilized)
set_sd_overutilized(env->sd);
- else
clear_sd_overutilized(env->sd);
- /*
* If there is a misfit task in one cpu in this sched_domain
* it is likely that the imbalance cannot be sorted out among
* the cpu's in this sched_domain. In this case set the
* overutilized flag at the parent sched_domain.
*/
- if (misfit_task) {
sd = env->sd->parent;
/*
* In case of a misfit task, load balance at the parent
* sched domain level will make sense only if the the cpus
* have a different capacity. If cpus at a domain level have
* the same capacity, the misfit task cannot be well
* accomodated in any of the cpus and there in no point in
* trying a load balance at this level
*/
while (sd) {
if (sd->flags & SD_ASYM_CPUCAPACITY) {
set_sd_overutilized(sd);
break;
}
sd = sd->parent;
}}
- /* If the domain util is greater that domain capacity, load balancing
* needs to be done at the next sched domain level as well
*/
- if (sds->total_capacity * 1024 < sds->total_util * capacity_margin)
set_sd_overutilized(env->sd->parent);
sched_domain_shared::overutilized is potentially set twice on DIE level, one time for misfit, one time for overutilized. This could be avoided by rearranging this code a little bit. Since this is the normal topology layout for today's big.LITTLE systems (SD_ASYM_CPUCAPACITY set on DIE level, MC sd level with sd->child = NULL and sd_mc->parent == sd_die), this would be nice to change.
Maybe something like this? Only lightly tested on Pixel (MC-DIE).
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index abd9dfa3f1ce..e2ff672cea52 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7817,7 +7817,7 @@ static inline enum fbq_type fbq_classify_rq(struct rq *rq) */ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds) { - struct sched_domain *child = env->sd->child, *sd; + struct sched_domain *child = env->sd->child, *sd = env->sd; struct sched_group *sg = env->sd->groups; struct sg_lb_stats tmp_sgs; int load_idx, prefer_sibling = 0; @@ -7902,32 +7902,34 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd * the cpu's in this sched_domain. In this case set the * overutilized flag at the parent sched_domain. */ - if (misfit_task) { + while (sd = sd->parent, sd) {
- sd = env->sd->parent; + /* + * If the domain util is greater that domain capacity, + * load balancing needs to be done at the next sched + * domain level as well + */ + if ((sd->child == env->sd) && + (sds->total_capacity * 1024 < + sds->total_util * capacity_margin)) { + set_sd_overutilized(sd); + if (sd->flags & SD_ASYM_CPUCAPACITY) + break; + }
/* * In case of a misfit task, load balance at the parent - * sched domain level will make sense only if the the cpus - * have a different capacity. If cpus at a domain level have - * the same capacity, the misfit task cannot be well - * accomodated in any of the cpus and there in no point in - * trying a load balance at this level + * sched domain level will make sense only if the the + * cpus have a different capacity. If cpus at a domain + * level have the same capacity, the misfit task cannot + * be well accomodated in any of the cpus and there in + * no point in trying a load balance at this level */ - while (sd) { - if (sd->flags & SD_ASYM_CPUCAPACITY) { - set_sd_overutilized(sd); - break; - } - sd = sd->parent; + if (misfit_task && sd->flags & SD_ASYM_CPUCAPACITY) { + set_sd_overutilized(sd); + break; } } - - /* If the domain util is greater that domain capacity, load balancing - * needs to be done at the next sched domain level as well