2016-09-12 15:47 GMT+08:00 Vincent Guittot vincent.guittot@linaro.org:
When a task moves from/to a cfs_rq, we set a flag which is then used to propagate the change at parent level (sched_entity and cfs_rq) during next update. If the cfs_rq is throttled, the flag will stay pending until the cfs_rw is unthrottled.
For propagating the utilization, we copy the utilization of child cfs_rq to the sched_entity.
For propagating the load, we have to take into account the load of the whole task group in order to evaluate the load of the sched_entity. Similarly to what was done before the rewrite of PELT, we add a correction factor in case the task group's load is less than its share so it will contribute the same load of a task of equal weight.
Signed-off-by: Vincent Guittot vincent.guittot@linaro.org
kernel/sched/fair.c | 170 ++++++++++++++++++++++++++++++++++++++++++++++++++- kernel/sched/sched.h | 1 + 2 files changed, 170 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0aa1d7d..e4015f6 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3017,6 +3017,132 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) } }
+#ifdef CONFIG_FAIR_GROUP_SCHED +/* Take into account change of utilization of a child task group */ +static inline void +update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se) +{
struct cfs_rq *gcfs_rq = group_cfs_rq(se);long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;/* Nothing to update */if (!delta)return;/* Set new sched_entity's utilizaton */
s/utilizaton/utilization
se->avg.util_avg = gcfs_rq->avg.util_avg;se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;/* Update parent cfs_rq utilization */cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg + delta, 0);cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;+}
+/* Take into account change of load of a child task group */ +static inline void +update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se) +{
struct cfs_rq *gcfs_rq = group_cfs_rq(se);long delta, load = gcfs_rq->avg.load_avg;/* If the load of group cfs_rq is null, the load of the* sched_entity will also be null so we can skip the formula*/if (load) {long tg_load;/* Get tg's load and ensure tg_load > 0 */tg_load = atomic_long_read(&gcfs_rq->tg->load_avg) + 1;/* Ensure tg_load >= load and updated with current load*/tg_load -= gcfs_rq->tg_load_avg_contrib;tg_load += load;/* scale gcfs_rq's load into tg's shares*/load *= scale_load_down(gcfs_rq->tg->shares);load /= tg_load;/** we need to compute a correction term in the case that the* task group is consuming <1 cpu so that we would contribute* the same load as a task of equal weight.*/if (tg_load < scale_load_down(gcfs_rq->tg->shares)) {load *= tg_load;load /= scale_load_down(gcfs_rq->tg->shares);}}delta = load - se->avg.load_avg;/* Nothing to update */if (!delta)return;/* Set new sched_entity's load */se->avg.load_avg = load;se->avg.load_sum = se->avg.load_avg * LOAD_AVG_MAX;/* Update parent cfs_rq load */cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg + delta, 0);cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * LOAD_AVG_MAX;+}
+static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) +{
/* set cfs_rq's flag */cfs_rq->propagate_avg = 1;+}
+static inline int test_and_clear_tg_cfs_propagate(struct sched_entity *se) +{
/* Get my cfs_rq */struct cfs_rq *cfs_rq = group_cfs_rq(se);/* Nothing to propagate */if (!cfs_rq->propagate_avg)return 0;/* Clear my cfs_rq's flag */cfs_rq->propagate_avg = 0;return 1;+}
+/* Update task and its cfs_rq load average */ +static inline int propagate_entity_load_avg(struct sched_entity *se) +{
struct cfs_rq *cfs_rq;if (entity_is_task(se))return 0;if (!test_and_clear_tg_cfs_propagate(se))return 0;/* Get parent cfs_rq */cfs_rq = cfs_rq_of(se);/* Propagate to parent */set_tg_cfs_propagate(cfs_rq);/* Update utilization */update_tg_cfs_util(cfs_rq, se);/* Update load */update_tg_cfs_load(cfs_rq, se);return 1;+} +#else +static inline int propagate_entity_load_avg(struct sched_entity *se) +{
return 0;+}
+static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) {} +#endif
/*
- Unsigned subtract and clamp on underflow.
@@ -3093,6 +3219,7 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg, u64 now = cfs_rq_clock_task(cfs_rq); struct rq *rq = rq_of(cfs_rq); int cpu = cpu_of(rq);
int decayed; /* * Track task load average for carrying it to new CPU after migrated, and@@ -3103,7 +3230,11 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg, se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se, NULL);
if (update_cfs_rq_load_avg(now, cfs_rq, true) && update_tg)
decayed = update_cfs_rq_load_avg(now, cfs_rq, true);decayed |= propagate_entity_load_avg(se);if (decayed && update_tg) update_tg_load_avg(cfs_rq, 0);}
@@ -3122,6 +3253,7 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s cfs_rq->avg.load_sum += se->avg.load_sum; cfs_rq->avg.util_avg += se->avg.util_avg; cfs_rq->avg.util_sum += se->avg.util_sum;
set_tg_cfs_propagate(cfs_rq); cfs_rq_util_change(cfs_rq);} @@ -3141,6 +3273,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum); sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg); sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
set_tg_cfs_propagate(cfs_rq); cfs_rq_util_change(cfs_rq);} @@ -8499,6 +8632,22 @@ static void detach_task_cfs_rq(struct task_struct *p) update_load_avg(se, 0, 0); detach_entity_load_avg(cfs_rq, se); update_tg_load_avg(cfs_rq, false);
+#ifdef CONFIG_FAIR_GROUP_SCHED
/** Propagate the detach across the tg tree to make it visible to the* root*/se = se->parent;for_each_sched_entity(se) {cfs_rq = cfs_rq_of(se);if (cfs_rq_throttled(cfs_rq))break;update_load_avg(se, 1, 0);}+#endif }
static void attach_entity_cfs_rq(struct sched_entity *se) @@ -8517,6 +8666,22 @@ static void attach_entity_cfs_rq(struct sched_entity *se) update_load_avg(se, 0, !sched_feat(ATTACH_AGE_LOAD)); attach_entity_load_avg(cfs_rq, se); update_tg_load_avg(cfs_rq, false);
+#ifdef CONFIG_FAIR_GROUP_SCHED
/** Propagate the attach across the tg tree to make it visible to the* root*/se = se->parent;for_each_sched_entity(se) {cfs_rq = cfs_rq_of(se);if (cfs_rq_throttled(cfs_rq))break;update_load_avg(se, 1, 0);}+#endif }
static void attach_task_cfs_rq(struct task_struct *p) @@ -8578,6 +8743,9 @@ void init_cfs_rq(struct cfs_rq *cfs_rq) cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; #endif #ifdef CONFIG_SMP +#ifdef CONFIG_FAIR_GROUP_SCHED
cfs_rq->propagate_avg = 0;+#endif atomic_long_set(&cfs_rq->removed_load_avg, 0); atomic_long_set(&cfs_rq->removed_util_avg, 0); #endif diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 483616a..0517a9e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -397,6 +397,7 @@ struct cfs_rq { unsigned long runnable_load_avg; #ifdef CONFIG_FAIR_GROUP_SCHED unsigned long tg_load_avg_contrib;
unsigned long propagate_avg;#endif atomic_long_t removed_load_avg, removed_util_avg;
#ifndef CONFIG_64BIT
1.9.1