Re: [PATCH 4/7 v3] sched: propagate load during synchronous attach/detach

19 Sep 2016

2016-09-12 15:47 GMT+08:00 Vincent Guittot vincent.guittot@linaro.org:
...
When a task moves from/to a cfs_rq, we set a flag which is then used to
propagate the change at parent level (sched_entity and cfs_rq) during
next update. If the cfs_rq is throttled, the flag will stay pending until
the cfs_rw is unthrottled.
For propagating the utilization, we copy the utilization of child cfs_rq to
the sched_entity.
For propagating the load, we have to take into account the load of the
whole task group in order to evaluate the load of the sched_entity.
Similarly to what was done before the rewrite of PELT, we add a correction
factor in case the task group's load is less than its share so it will
contribute the same load of a task of equal weight.
Signed-off-by: Vincent Guittot vincent.guittot@linaro.org
kernel/sched/fair.c  | 170 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 kernel/sched/sched.h |   1 +
 2 files changed, 170 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0aa1d7d..e4015f6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3017,6 +3017,132 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
        }
 }
+#ifdef CONFIG_FAIR_GROUP_SCHED
+/* Take into account change of utilization of a child task group */
+static inline void
+update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{

  struct cfs_rq *gcfs_rq =  group_cfs_rq(se);


  long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;



  /* Nothing to update */


  if (!delta)


          return;



  /* Set new sched_entity's utilizaton */



s/utilizaton/utilization
...

  se->avg.util_avg = gcfs_rq->avg.util_avg;


  se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;



  /* Update parent cfs_rq utilization */


  cfs_rq->avg.util_avg =  max_t(long, cfs_rq->avg.util_avg + delta, 0);


  cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;



+}



+/* Take into account change of load of a child task group */
+static inline void
+update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{

  struct cfs_rq *gcfs_rq = group_cfs_rq(se);


  long delta, load = gcfs_rq->avg.load_avg;



  /* If the load of group cfs_rq is null, the load of the


   * sched_entity will also be null so we can skip the formula


   */


  if (load) {


          long tg_load;



          /* Get tg's load and ensure tg_load > 0 */


          tg_load = atomic_long_read(&gcfs_rq->tg->load_avg) + 1;



          /* Ensure tg_load >= load and updated with current load*/


          tg_load -= gcfs_rq->tg_load_avg_contrib;


          tg_load += load;



          /* scale gcfs_rq's load into tg's shares*/


          load *= scale_load_down(gcfs_rq->tg->shares);


          load /= tg_load;



          /*


           * we need to compute a correction term in the case that the


           * task group is consuming <1 cpu so that we would contribute


           * the same load as a task of equal weight.


          */


          if (tg_load < scale_load_down(gcfs_rq->tg->shares)) {


                  load *= tg_load;


                  load /= scale_load_down(gcfs_rq->tg->shares);


          }


  }



  delta = load - se->avg.load_avg;



  /* Nothing to update */


  if (!delta)


          return;



  /* Set new sched_entity's load */


  se->avg.load_avg = load;


  se->avg.load_sum = se->avg.load_avg * LOAD_AVG_MAX;



  /* Update parent cfs_rq load */


  cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg + delta, 0);


  cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * LOAD_AVG_MAX;



+}



+static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq)
+{

  /* set cfs_rq's flag */


  cfs_rq->propagate_avg = 1;



+}



+static inline int test_and_clear_tg_cfs_propagate(struct sched_entity *se)
+{

  /* Get my cfs_rq */


  struct cfs_rq *cfs_rq = group_cfs_rq(se);



  /* Nothing to propagate */


  if (!cfs_rq->propagate_avg)


          return 0;



  /* Clear my cfs_rq's flag */


  cfs_rq->propagate_avg = 0;



  return 1;



+}



+/* Update task and its cfs_rq load average */
+static inline int propagate_entity_load_avg(struct sched_entity *se)
+{

  struct cfs_rq *cfs_rq;



  if (entity_is_task(se))


          return 0;



  if (!test_and_clear_tg_cfs_propagate(se))


          return 0;



  /* Get parent cfs_rq */


  cfs_rq = cfs_rq_of(se);



  /* Propagate to parent */


  set_tg_cfs_propagate(cfs_rq);



  /* Update utilization */


  update_tg_cfs_util(cfs_rq, se);



  /* Update load */


  update_tg_cfs_load(cfs_rq, se);



  return 1;



+}
+#else
+static inline int propagate_entity_load_avg(struct sched_entity *se)
+{

  return 0;



+}



+static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) {}
+#endif



/*

Unsigned subtract and clamp on underflow.


@@ -3093,6 +3219,7 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg,
        u64 now = cfs_rq_clock_task(cfs_rq);
        struct rq *rq = rq_of(cfs_rq);
        int cpu = cpu_of(rq);

  int decayed;

  /*
   * Track task load average for carrying it to new CPU after migrated, and



@@ -3103,7 +3230,11 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg,
                          se->on_rq * scale_load_down(se->load.weight),
                          cfs_rq->curr == se, NULL);

  if (update_cfs_rq_load_avg(now, cfs_rq, true) && update_tg)




  decayed = update_cfs_rq_load_avg(now, cfs_rq, true);



  decayed |= propagate_entity_load_avg(se);



  if (decayed && update_tg)
          update_tg_load_avg(cfs_rq, 0);



}
@@ -3122,6 +3253,7 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
        cfs_rq->avg.load_sum += se->avg.load_sum;
        cfs_rq->avg.util_avg += se->avg.util_avg;
        cfs_rq->avg.util_sum += se->avg.util_sum;

  set_tg_cfs_propagate(cfs_rq);

  cfs_rq_util_change(cfs_rq);



}
@@ -3141,6 +3273,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
        sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum);
        sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
        sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);

  set_tg_cfs_propagate(cfs_rq);

  cfs_rq_util_change(cfs_rq);



}
@@ -8499,6 +8632,22 @@ static void detach_task_cfs_rq(struct task_struct *p)
        update_load_avg(se, 0, 0);
        detach_entity_load_avg(cfs_rq, se);
        update_tg_load_avg(cfs_rq, false);



+#ifdef CONFIG_FAIR_GROUP_SCHED

  /*


   * Propagate the detach across the tg tree to make it visible to the


   * root


   */


  se = se->parent;


  for_each_sched_entity(se) {


          cfs_rq = cfs_rq_of(se);



          if (cfs_rq_throttled(cfs_rq))


                  break;



          update_load_avg(se, 1, 0);


  }



+#endif
 }
static void attach_entity_cfs_rq(struct sched_entity *se)
@@ -8517,6 +8666,22 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
        update_load_avg(se, 0, !sched_feat(ATTACH_AGE_LOAD));
        attach_entity_load_avg(cfs_rq, se);
        update_tg_load_avg(cfs_rq, false);



+#ifdef CONFIG_FAIR_GROUP_SCHED

  /*


   * Propagate the attach across the tg tree to make it visible to the


   * root


   */


  se = se->parent;


  for_each_sched_entity(se) {


          cfs_rq = cfs_rq_of(se);



          if (cfs_rq_throttled(cfs_rq))


                  break;



          update_load_avg(se, 1, 0);


  }



+#endif
 }
static void attach_task_cfs_rq(struct task_struct *p)
@@ -8578,6 +8743,9 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
        cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
 #endif
 #ifdef CONFIG_SMP
+#ifdef CONFIG_FAIR_GROUP_SCHED

  cfs_rq->propagate_avg = 0;



+#endif
        atomic_long_set(&cfs_rq->removed_load_avg, 0);
        atomic_long_set(&cfs_rq->removed_util_avg, 0);
 #endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 483616a..0517a9e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -397,6 +397,7 @@ struct cfs_rq {
        unsigned long runnable_load_avg;
 #ifdef CONFIG_FAIR_GROUP_SCHED
        unsigned long tg_load_avg_contrib;

  unsigned long propagate_avg;



#endif
        atomic_long_t removed_load_avg, removed_util_avg;
#ifndef CONFIG_64BIT
1.9.1

    

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

Re: [PATCH 4/7 v3] sched: propagate load during synchronous attach/detach

Signed-off-by: Vincent Guittot vincent.guittot@linaro.org

#ifndef CONFIG_64BIT