On 20/02/17 20:24, Dietmar Eggemann wrote:
[...]
On 16/02/17 20:33, Thara Gopinath wrote:
[...]
@@ -7895,14 +7940,45 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd /* update overload indicator if we are at root domain */ if (env->dst_rq->rd->overload != overload) env->dst_rq->rd->overload = overload;
- }
/* Update over-utilization (tipping point, U >= 0) indicator */if (env->dst_rq->rd->overutilized != overutilized)env->dst_rq->rd->overutilized = overutilized;- } else {
if (!env->dst_rq->rd->overutilized && overutilized)env->dst_rq->rd->overutilized = true;
- if (overutilized)
set_sd_overutilized(env->sd);- else
clear_sd_overutilized(env->sd);- /*
* If there is a misfit task in one cpu in this sched_domain* it is likely that the imbalance cannot be sorted out among* the cpu's in this sched_domain. In this case set the* overutilized flag at the parent sched_domain.*/- if (misfit_task) {
sd = env->sd->parent;/** In case of a misfit task, load balance at the parent* sched domain level will make sense only if the the cpus* have a different capacity. If cpus at a domain level have* the same capacity, the misfit task cannot be well* accomodated in any of the cpus and there in no point in* trying a load balance at this level*/while (sd) {if (sd->flags & SD_ASYM_CPUCAPACITY) {set_sd_overutilized(sd);break;}sd = sd->parent; }}- /* If the domain util is greater that domain capacity, load balancing
* needs to be done at the next sched domain level as well*/- if (sds->total_capacity * 1024 < sds->total_util * capacity_margin)
set_sd_overutilized(env->sd->parent);sched_domain_shared::overutilized is potentially set twice on DIE level, one time for misfit, one time for overutilized. This could be avoided by rearranging this code a little bit. Since this is the normal topology layout for today's big.LITTLE systems (SD_ASYM_CPUCAPACITY set on DIE level, MC sd level with sd->child = NULL and sd_mc->parent == sd_die), this would be nice to change.
Maybe something like this? Only lightly tested on Pixel (MC-DIE).
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index abd9dfa3f1ce..e2ff672cea52 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7817,7 +7817,7 @@ static inline enum fbq_type fbq_classify_rq(struct rq *rq) */ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds) { - struct sched_domain *child = env->sd->child, *sd; + struct sched_domain *child = env->sd->child, *sd = env->sd; struct sched_group *sg = env->sd->groups; struct sg_lb_stats tmp_sgs; int load_idx, prefer_sibling = 0; @@ -7902,32 +7902,34 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd * the cpu's in this sched_domain. In this case set the * overutilized flag at the parent sched_domain. */ - if (misfit_task) { + while (sd = sd->parent, sd) {
- sd = env->sd->parent; + /* + * If the domain util is greater that domain capacity, + * load balancing needs to be done at the next sched + * domain level as well + */ + if ((sd->child == env->sd) && + (sds->total_capacity * 1024 < + sds->total_util * capacity_margin)) { + set_sd_overutilized(sd); + if (sd->flags & SD_ASYM_CPUCAPACITY) + break; + }
/* * In case of a misfit task, load balance at the parent - * sched domain level will make sense only if the the cpus - * have a different capacity. If cpus at a domain level have - * the same capacity, the misfit task cannot be well - * accomodated in any of the cpus and there in no point in - * trying a load balance at this level + * sched domain level will make sense only if the the + * cpus have a different capacity. If cpus at a domain + * level have the same capacity, the misfit task cannot + * be well accomodated in any of the cpus and there in + * no point in trying a load balance at this level */ - while (sd) { - if (sd->flags & SD_ASYM_CPUCAPACITY) { - set_sd_overutilized(sd); - break; - } - sd = sd->parent; + if (misfit_task && sd->flags & SD_ASYM_CPUCAPACITY) { + set_sd_overutilized(sd); + break; } } - - /* If the domain util is greater that domain capacity, load balancing - * needs to be done at the next sched domain level as well