The current implementation of overutilization, aborts energy aware
scheduling if any cpu in the system is over-utilized. This patch introduces
over utilization flag per sched domain level instead of a single flag
system wide. Load balancing is done at the sched domain where any
of the cpu is over utilized. If energy aware scheduling is
enabled and no cpu in a sched domain is overuttilized,
load balancing is skipped for that sched domain and energy aware
scheduling continues at that level.
The implementation takes advantage of the shared sched_domain structure
that is common across all the sched domains at a level. The new flag
introduced is placed in this structure so that all the sched domains the
same level share the flag. In case of an overutilized cpu, the flag gets
set at level1 sched_domain. The flag at the parent sched_domain level gets
set in either of the two following scenarios.
1. There is a misfit task in one of the cpu's in this sched_domain.
2. The total utilization of the domain is greater than the domain capacity
The flag is cleared if no cpu in a sched domain is overutilized.
Signed-off-by: Thara Gopinath <thara.gopinath(a)linaro.org>
---
include/linux/sched.h | 1 +
kernel/sched/core.c | 7 ++-
kernel/sched/fair.c | 120 +++++++++++++++++++++++++++++++++++++++-----------
3 files changed, 99 insertions(+), 29 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1c5122e..971842a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1112,6 +1112,7 @@ struct sched_domain_shared {
atomic_t ref;
atomic_t nr_busy_cpus;
int has_idle_cores;
+ bool overutilized;
};
struct sched_domain {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 31a466f..e0a8758 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6659,11 +6659,10 @@ sd_init(struct sched_domain_topology_level *tl,
* For all levels sharing cache; connect a sched_domain_shared
* instance.
*/
- if (sd->flags & SD_SHARE_PKG_RESOURCES) {
- sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
- atomic_inc(&sd->shared->ref);
+ sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
+ atomic_inc(&sd->shared->ref);
+ if (sd->flags & SD_SHARE_PKG_RESOURCES)
atomic_set(&sd->shared->nr_busy_cpus, sd_weight);
- }
sd->private = sdd;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 489f6d3..485f597 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4735,6 +4735,30 @@ static inline void hrtick_update(struct rq *rq)
static bool cpu_overutilized(int cpu);
+static bool
+is_sd_overutilized(struct sched_domain *sd)
+{
+ if (sd)
+ return sd->shared->overutilized;
+ else
+ return false;
+}
+
+static void
+set_sd_overutilized(struct sched_domain *sd)
+{
+ if (sd)
+ sd->shared->overutilized = true;
+}
+
+static void
+clear_sd_overutilized(struct sched_domain *sd)
+{
+ if (sd)
+ sd->shared->overutilized = false;
+}
+
+
/*
* The enqueue_task method is called before nr_running is
* increased. Here we update the fair scheduling stats and
@@ -4744,6 +4768,7 @@ static void
enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
struct cfs_rq *cfs_rq;
+ struct sched_domain *sd;
struct sched_entity *se = &p->se;
int task_new = !(flags & ENQUEUE_WAKEUP);
@@ -4787,9 +4812,12 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (!se) {
add_nr_running(rq, 1);
- if (!task_new && !rq->rd->overutilized &&
- cpu_overutilized(rq->cpu))
- rq->rd->overutilized = true;
+ rcu_read_lock();
+ sd = rcu_dereference(rq->sd);
+ if (!task_new && !is_sd_overutilized(sd) &&
+ cpu_overutilized(rq->cpu))
+ set_sd_overutilized(sd);
+ rcu_read_unlock();
}
hrtick_update(rq);
}
@@ -6173,8 +6201,7 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu)
unsigned long max_spare = 0;
struct sched_domain *sd;
- rcu_read_lock();
-
+ /* The rcu lock is/should be held in the caller function */
sd = rcu_dereference(per_cpu(sd_ea, prev_cpu));
if (!sd)
@@ -6212,8 +6239,6 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu)
}
unlock:
- rcu_read_unlock();
-
if (energy_cpu == prev_cpu && !cpu_overutilized(prev_cpu))
return prev_cpu;
@@ -6247,10 +6272,16 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
&& cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
}
- if (energy_aware() && !(cpu_rq(prev_cpu)->rd->overutilized))
- return select_energy_cpu_brute(p, prev_cpu);
-
rcu_read_lock();
+ sd = rcu_dereference(cpu_rq(prev_cpu)->sd);
+ if (energy_aware() &&
+ !is_sd_overutilized(sd)) {
+ new_cpu = select_energy_cpu_brute(p, prev_cpu);
+ goto unlock;
+ }
+
+ sd = NULL;
+
for_each_domain(cpu, tmp) {
if (!(tmp->flags & SD_LOAD_BALANCE))
break;
@@ -6315,6 +6346,8 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
}
/* while loop will break here if sd == NULL */
}
+
+unlock:
rcu_read_unlock();
return new_cpu;
@@ -7366,6 +7399,7 @@ struct sd_lb_stats {
struct sched_group *local; /* Local group in this sd */
unsigned long total_load; /* Total load of all groups in sd */
unsigned long total_capacity; /* Total capacity of all groups in sd */
+ unsigned long total_util; /* Total util of all groups in sd */
unsigned long avg_load; /* Average load across all groups in sd */
struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */
@@ -7385,6 +7419,7 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
.local = NULL,
.total_load = 0UL,
.total_capacity = 0UL,
+ .total_util = 0UL,
.busiest_stat = {
.avg_load = 0UL,
.sum_nr_running = 0,
@@ -7664,7 +7699,7 @@ group_type group_classify(struct sched_group *group,
static inline void update_sg_lb_stats(struct lb_env *env,
struct sched_group *group, int load_idx,
int local_group, struct sg_lb_stats *sgs,
- bool *overload, bool *overutilized)
+ bool *overload, bool *overutilized, bool *misfit_task)
{
unsigned long load;
int i, nr_running;
@@ -7699,8 +7734,16 @@ static inline void update_sg_lb_stats(struct lb_env *env,
if (!nr_running && idle_cpu(i))
sgs->idle_cpus++;
- if (cpu_overutilized(i))
+ if (cpu_overutilized(i)) {
*overutilized = true;
+ /*
+ * If the cpu is overutilized and if there is only one
+ * current task in cfs runqueue, it is potentially a misfit
+ * task.
+ */
+ if (rq->cfs.h_nr_running == 1)
+ *misfit_task = true;
+ }
}
/* Adjust by relative CPU capacity of the group */
@@ -7829,7 +7872,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
struct sched_group *sg = env->sd->groups;
struct sg_lb_stats tmp_sgs;
int load_idx, prefer_sibling = 0;
- bool overload = false, overutilized = false;
+ bool overload = false, overutilized = false, misfit_task = false;
if (child && child->flags & SD_PREFER_SIBLING)
prefer_sibling = 1;
@@ -7851,7 +7894,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
}
update_sg_lb_stats(env, sg, load_idx, local_group, sgs,
- &overload, &overutilized);
+ &overload, &overutilized,
+ &misfit_task);
if (local_group)
goto next_group;
@@ -7882,6 +7926,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
/* Now, start updating sd_lb_stats */
sds->total_load += sgs->group_load;
sds->total_capacity += sgs->group_capacity;
+ sds->total_util += sgs->group_util;
sg = sg->next;
} while (sg != env->sd->groups);
@@ -7895,14 +7940,27 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
/* update overload indicator if we are at root domain */
if (env->dst_rq->rd->overload != overload)
env->dst_rq->rd->overload = overload;
-
- /* Update over-utilization (tipping point, U >= 0) indicator */
- if (env->dst_rq->rd->overutilized != overutilized)
- env->dst_rq->rd->overutilized = overutilized;
- } else {
- if (!env->dst_rq->rd->overutilized && overutilized)
- env->dst_rq->rd->overutilized = true;
}
+
+ if (overutilized)
+ set_sd_overutilized(env->sd);
+ else
+ clear_sd_overutilized(env->sd);
+
+ /*
+ * If there is a misfit task in one cpu in this sched_domain
+ * it is likely that the imbalance cannot be sorted out among
+ * the cpu's in this sched_domain. In this case set the
+ * overutilized flag at the parent sched_domain.
+ */
+ if (misfit_task)
+ set_sd_overutilized(env->sd->parent);
+
+ /* If the domain util is greater that domain capacity, load balancing
+ * needs to be done at the next sched domain level as well
+ */
+ if (sds->total_capacity * 1024 < sds->total_util * capacity_margin)
+ set_sd_overutilized(env->sd->parent);
}
/**
@@ -8122,8 +8180,10 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
*/
update_sd_lb_stats(env, &sds);
- if (energy_aware() && !env->dst_rq->rd->overutilized)
- goto out_balanced;
+ if (energy_aware()) {
+ if (!is_sd_overutilized(env->sd))
+ goto out_balanced;
+ }
local = &sds.local_stat;
busiest = &sds.busiest_stat;
@@ -8981,6 +9041,11 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
rcu_read_lock();
for_each_domain(cpu, sd) {
+ if (energy_aware()) {
+ if (!is_sd_overutilized(sd))
+ continue;
+ }
+
/*
* Decay the newidle max times here because this is a regular
* visit to all the domains. Decay ~1% per second.
@@ -9280,6 +9345,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &curr->se;
+ struct sched_domain *sd;
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
@@ -9289,8 +9355,12 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
if (static_branch_unlikely(&sched_numa_balancing))
task_tick_numa(rq, curr);
- if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr)))
- rq->rd->overutilized = true;
+ rcu_read_lock();
+ sd = rcu_dereference(rq->sd);
+ if (!is_sd_overutilized(sd) &&
+ cpu_overutilized(task_cpu(curr)))
+ set_sd_overutilized(sd);
+ rcu_read_unlock();
}
/*
--
2.1.4
This patch series is to improve load balance with more proper behaviour
for misfit task. Current code introduces type 'group_misfit_task' to
indicate one schedule group has misfit task, but before the misfit task
can be really migrated onto higher capacity CPU there still have some
barriers we need clear up.
The first patch is to correct task_fits_max() so it can properly filter
out misfit task on low capacity CPU. If without this patch, in system
it's possible this function can always return true so the 'misfit' task
mechanism will totally not be triggered.
The second patch is to fix function group_smaller_cpu_capacity(), so we
can make sure the schedule group with type 'group_misfit_task' will not
wrongly be roll back to type 'group_other'. This will let all misfit
related info be abondoned.
The third patch is to fix nr_running accounting, if without this patch
the scheduler will wronly consider the destination CPU has running task
and skip migrate task on it. This patch is to give correct info like
the destination CPU has no running task on it when the CPU is going into
idle state, so should migrate misfit task by utilizing this time balance.
The forth patch is a temperary patch if we have not backported Vincent's
patches "sched: reflect sched_entity move into task_group's load" [1],
If without this patch series, it's possible that the CPU is not
overutilized but the CPU has one misfit task has been enqueued on it.
So we set sgs->group_misfit_task by checking rq->misfit_task but not
rely on cpu is overutilized or not.
The fifth patch is to select busiest rq if the rq has misfit task, we
let this kind rq has higher priority than the rq with highest weighted
load. This criteria is only enabled for energy aware scheduling.
The sixth patch is to aggressively kick active load balance for misfit
task, so it has quite high chance for higher capacity CPU to
immediately pull misfit task on it.
[1] https://lkml.org/lkml/2016/10/17/223
Leo Yan (6):
sched/fair: correct task_fits_max() for misfit task
sched/fair: fix for group_smaller_cpu_capacity()
sched/fair: fix nr_running accounting for new idle CPU
sched/fair: fix to set sgs->group_misfit_task
sched/fair: select busiest rq with misfit task
sched/fair: kick active load balance for misfit task
kernel/sched/fair.c | 59 +++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 46 insertions(+), 13 deletions(-)
--
2.7.4
Hi Guys,
All of this work was done by Steve before he left. I have made very
minor changes, merged few patches, rebased over 4.10-rc5.
More details can be found here:
https://projects.linaro.org/browse/PMWG-1018
With Android UI and benchmarks the latency of cpufreq response to
certain scheduling events can become very critical. Currently on
mainline tip, callbacks into schedutil are only made from the scheduler
if the target CPU of the event is the same as the current CPU. This
means there are certain situations where a target CPU may not run
schedutil for some time.
One testcase to show this behavior is where a task starts running on
CPU0, then a new task is also spawned on CPU0 by a task on CPU1. If the
system is configured such that new tasks should receive maximum demand
initially, this should result in CPU0 increasing frequency immediately.
Because of the above mentioned limitation though this does not occur.
This patchset defers the callback into schedutil if the callback would
be remote (not for a CPU in the policy of which we are running). If
there is no preemption required by the wakeup a late callback into
schedutil is made, and schedutil is modified to be able to correctly
deal with remote callbacks. If preemption does occur then the scheduler,
and schedutil, will run on the remote CPU anyway.
I would be doing further testing on this to get more performance numbers
with it, just wanted to get some early responses and so sending it to
the EAS list.
--
viresh
Steve Muckle (9):
sched: cpufreq: add cpu to update_util_data
irq_work: add irq_work_queue_on for !CONFIG_SMP
sched: cpufreq: extend irq work to support fast switches
sched: cpufreq: remove smp_processor_id() in remote paths
sched: create late cpufreq callback
sched: cpufreq: detect, process remote callbacks
cpufreq: governor: support scheduler cpufreq callbacks on remote CPUs
intel_pstate: ignore scheduler cpufreq callbacks on remote CPUs
sched: cpufreq: enable remote sched cpufreq callbacks
drivers/cpufreq/cpufreq_governor.c | 2 +-
drivers/cpufreq/intel_pstate.c | 3 ++
include/linux/irq_work.h | 7 ++++
include/linux/sched.h | 1 +
kernel/sched/core.c | 4 ++
kernel/sched/cpufreq.c | 1 +
kernel/sched/cpufreq_schedutil.c | 80 +++++++++++++++++++++++++++-----------
kernel/sched/fair.c | 6 ++-
kernel/sched/sched.h | 24 +++++++++++-
9 files changed, 102 insertions(+), 26 deletions(-)
--
2.7.1.410.g6faf27b
The current implementation of overutilization, aborts energy aware
scheduling if any cpu in the system is over-utilized. This patch introduces
over utilization flag per sched group level instead of a single flag
system wide. Load balancing is done at the sched domain where any
of the sched group is over utilized. If energy aware scheduling is
enabled and no sched group in a sched domain is overuttilized,
load balancing is skipped for that sched domain and energy aware
scheduling continues at that level.
The implementation is based on two points
1. For every cpu in every sched domain the first group
is the group that contains the cpu itself.
2. sched groups are shared between cpus.
Thus if a sched group is overutilized the overutilized flag is
set at the first sched group of the parent sched domain. This ensures a
load balancing at the overutilzed sched domain level.
For example consider a big little system with two little cpu's (CPU A and CPU B)
and two big cpu's (CPU C and CPU D). In this system, the hierarchy will be as follows
CPU A
SD level 1 - SG1 (CPUA), SG2 (CPUB)
SD level 2 - SG5(CPUA, CPUB), SG6(CPU C, CPU D)
RD
CPU B
SD level 1 - SG2(CPUB), SG1 (CPUA)
SD level 2 - SG5(CPU A, CPU B), SG6(CPU C, CPUD)
RD
CPU C
SD level 1 - SG3(CPU C), SG4 (CPUD)
SD level 2 - SG6(CPUC, CPUD), SG5(CPUA, CPU B)
RD
CPU D
SD level 1 - SG4(CPU D), SG3(CPU C)
SD level2 - SG6(CPUC, CPU D), SG5(CPU A, APU B)
RD
In the above system if CPUA is overutilized, the overutilized
flag is set at SG5(parent sched domain first sched group). Similarly
if CPUB is overutilized, the flag is set at SG5. During load balancing,
at SD level 1, the overutilized flag is checked at the parent sched domain,
first sched group level(SG5). If there is no parent sched domain, then the flag
is set/checked at the root domain. This ensures that load balancing happens
irrespective of which cpu is over utilized in a sched domain.
Signed-off-by: Thara Gopinath <thara.gopinath(a)linaro.org>
---
kernel/sched/fair.c | 108 ++++++++++++++++++++++++++++++++++++++++++---------
kernel/sched/sched.h | 1 +
2 files changed, 90 insertions(+), 19 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 01fa969..0c97e0a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4559,6 +4559,36 @@ static inline void hrtick_update(struct rq *rq)
static bool cpu_overutilized(int cpu);
+static bool
+is_sd_overutilized(struct sched_domain *sd, struct root_domain *rd)
+{
+ if (sd && sd->parent)
+ return sd->parent->groups->overutilized;
+
+ if (!rd)
+ return false;
+
+ return rd->overutilized;
+}
+
+static void
+set_sd_overutilized(struct sched_domain *sd, struct root_domain *rd)
+{
+ if (sd && sd->parent)
+ sd->parent->groups->overutilized = true;
+ else if (rd)
+ rd->overutilized = true;
+}
+
+static void
+clear_sd_overutilized(struct sched_domain *sd, struct root_domain *rd)
+{
+ if (sd && sd->parent)
+ sd->parent->groups->overutilized = false;
+ else if (rd)
+ rd->overutilized = false;
+}
+
/*
* The enqueue_task method is called before nr_running is
* increased. Here we update the fair scheduling stats and
@@ -4568,6 +4598,7 @@ static void
enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
struct cfs_rq *cfs_rq;
+ struct sched_domain *sd;
struct sched_entity *se = &p->se;
int task_new = !(flags & ENQUEUE_WAKEUP);
@@ -4603,9 +4634,12 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (!se) {
add_nr_running(rq, 1);
- if (!task_new && !rq->rd->overutilized &&
- cpu_overutilized(rq->cpu))
- rq->rd->overutilized = true;
+ rcu_read_lock();
+ sd = rcu_dereference(rq->sd);
+ if (!task_new && !is_sd_overutilized(sd, rq->rd) &&
+ cpu_overutilized(rq->cpu))
+ set_sd_overutilized(sd, rq->rd);
+ rcu_read_unlock();
}
hrtick_update(rq);
}
@@ -5989,8 +6023,6 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu)
unsigned long max_spare = 0;
struct sched_domain *sd;
- rcu_read_lock();
-
sd = rcu_dereference(per_cpu(sd_ea, prev_cpu));
if (!sd)
@@ -6028,7 +6060,6 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu)
}
unlock:
- rcu_read_unlock();
if (energy_cpu == prev_cpu && !cpu_overutilized(prev_cpu))
return prev_cpu;
@@ -6063,10 +6094,16 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
&& cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
}
- if (energy_aware() && !(cpu_rq(prev_cpu)->rd->overutilized))
- return select_energy_cpu_brute(p, prev_cpu);
-
rcu_read_lock();
+ sd = rcu_dereference(cpu_rq(prev_cpu)->sd);
+ if (energy_aware() &&
+ !is_sd_overutilized(sd,
+ cpu_rq(cpu)->rd)) {
+ new_cpu = select_energy_cpu_brute(p, prev_cpu);
+ goto unlock;
+ }
+
+ sd = NULL;
for_each_domain(cpu, tmp) {
if (!(tmp->flags & SD_LOAD_BALANCE))
break;
@@ -6131,6 +6168,8 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
}
/* while loop will break here if sd == NULL */
}
+
+unlock:
rcu_read_unlock();
return new_cpu;
@@ -7178,6 +7217,7 @@ struct sd_lb_stats {
struct sched_group *local; /* Local group in this sd */
unsigned long total_load; /* Total load of all groups in sd */
unsigned long total_capacity; /* Total capacity of all groups in sd */
+ unsigned long total_util; /* Total util of all groups in sd */
unsigned long avg_load; /* Average load across all groups in sd */
struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */
@@ -7197,6 +7237,7 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
.local = NULL,
.total_load = 0UL,
.total_capacity = 0UL,
+ .total_util = 0UL,
.busiest_stat = {
.avg_load = 0UL,
.sum_nr_running = 0,
@@ -7692,6 +7733,7 @@ next_group:
/* Now, start updating sd_lb_stats */
sds->total_load += sgs->group_load;
sds->total_capacity += sgs->group_capacity;
+ sds->total_util += sgs->group_util;
sg = sg->next;
} while (sg != env->sd->groups);
@@ -7701,17 +7743,26 @@ next_group:
env->src_grp_nr_running = sds->busiest_stat.sum_nr_running;
+ /* Setting overutilized flag might not be necessary here
+ * Revisit
+ */
if (!lb_sd_parent(env->sd)) {
/* update overload indicator if we are at root domain */
if (env->dst_rq->rd->overload != overload)
env->dst_rq->rd->overload = overload;
+ }
- /* Update over-utilization (tipping point, U >= 0) indicator */
- if (env->dst_rq->rd->overutilized != overutilized)
- env->dst_rq->rd->overutilized = overutilized;
- } else {
- if (!env->dst_rq->rd->overutilized && overutilized)
- env->dst_rq->rd->overutilized = true;
+ if (overutilized)
+ set_sd_overutilized(env->sd, env->dst_rq->rd);
+
+ /* If the domain util is greater that domain capacity, load balancing
+ * needs to be done at the next sched domain level as well
+ */
+ if (sds->total_capacity * 1024 < sds->total_util * capacity_margin) {
+ /* If already at the highest domain nothing can be done */
+ if (env->sd->parent)
+ set_sd_overutilized(env->sd->parent,
+ env->dst_rq->rd);
}
}
@@ -7932,8 +7983,11 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
*/
update_sd_lb_stats(env, &sds);
- if (energy_aware() && !env->dst_rq->rd->overutilized)
- goto out_balanced;
+ /* Is this check really required here?? Revisit */
+ if (energy_aware()) {
+ if (!is_sd_overutilized(env->sd, env->dst_rq->rd))
+ goto out_balanced;
+ }
local = &sds.local_stat;
busiest = &sds.busiest_stat;
@@ -8000,6 +8054,12 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
force_balance:
/* Looks like there is an imbalance. Compute it */
calculate_imbalance(env, &sds);
+
+ /* Is this the correct place to clear this flag? Should access
+ * to flag be locked? Revisit.
+ */
+ clear_sd_overutilized(env->sd, env->dst_rq->rd);
+
return sds.busiest;
out_balanced:
@@ -8790,6 +8850,11 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
rcu_read_lock();
for_each_domain(cpu, sd) {
+ if (energy_aware()) {
+ if (!is_sd_overutilized(sd, rq->rd))
+ continue;
+ }
+
/*
* Decay the newidle max times here because this is a regular
* visit to all the domains. Decay ~1% per second.
@@ -9083,6 +9148,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &curr->se;
+ struct sched_domain *sd;
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
@@ -9092,8 +9158,12 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
if (static_branch_unlikely(&sched_numa_balancing))
task_tick_numa(rq, curr);
- if (!rq->rd->overutilized && cpu_overutilized(task_cpu(curr)))
- rq->rd->overutilized = true;
+ rcu_read_lock();
+ sd = rcu_dereference(rq->sd);
+ if (!is_sd_overutilized(sd, rq->rd) &&
+ cpu_overutilized(task_cpu(curr)))
+ set_sd_overutilized(sd, rq->rd);
+ rcu_read_unlock();
}
/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index f99391d..90c48ac 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -913,6 +913,7 @@ struct sched_group {
unsigned int group_weight;
struct sched_group_capacity *sgc;
const struct sched_group_energy const *sge;
+ bool overutilized;
/*
* The CPUs this group covers.
--
2.1.4
Dear Manager;
How is everything going? I had sent you some emails before but got no response, It will be very much grateful if you can give me a short reply this time.
The price is still unstable in China steel market, It is predicted it will continue to rise very soon, we decided to make promotion with stock steel pipes to thanks our new and old clients.The price is really good in such a market situation and we can arrange delivery before Spring Festival.
ItemTypeStandardMaterialOD (MM)WT
(MM)Length
(M)QuantityCoatingFOB Tianjin(20"*5)
PcsTon UNIT
USD/TonAmount
1SMLSAPI 5LGr.B21.32.775.8190714.00 Bare$769.91$10,778.73
2SMLSAPI 5LGr.B13.72.245.8381314.00 Bare$988.91$13,844.54
3SMLSAPI 5LGr.B26.72.875.8143114.00 Bare$718.38$10,055.92
4SMLSAPI 5LGr.B42.23.565.852910.41 Bare$639.80$6,658.97
5SMLSAPI 5LGr.B48.33.685.846810.99 Bare$621.76$6,833.91
6SMLSAPI 5LGr.B735.165.81798.96 Bare$620.48$5,560.76
7SMLSAPI 5LGr.B88.95.495.8140.92 Bare$620.48$568.94
8SMLSAPI 5LGr.B101.65.745.820315.98 Bare$611.46$9,768.61
9SMLSAPI 5LGr.B114.36.025.8555.13 Bare$611.46$3,135.41
10SMLSAPI 5LGr.B141.36.555.8688.58 Bare$614.03$5,270.99
11SMLSAPI 5LGr.B219.18.185.8204.94 Bare$614.03$3,030.50
12SMLSAPI 5LGr.B2739.275.86020.98 Bare$614.03$12,882.61
13SMLSAPI 5LGr.B323.810.315.8104.62 Bare$614.03$2,838.54
14SMLSAPI 5LGr.B355.69.535.894.25 Bare$641.09$2,721.68
Please don't hesitate to let me know if any inquiry or questions.
All the best,
P Please consider the environment before printing this e-mail.
Dear Friend;
Happy New Year!
How time flies, It has been a fresh new year now. you must have had a fruitful and wonderful year in 2016, may the joy and fortune continue to be in company with you and your families in 2017.
We are planing to make promotion with some stock steel pipes include but not limited to welded steel pipe to support you at the beginning of this year. The quantities are very small and price is very favorable , please check the form below and don't hesitate to let me know if any inquiry or questions.
ItemNameStandardMaterialSizeQuantityRemark
OD
(mm)THK
(mm)Length(m)PCSTONS
1ERWAPI 5LQ345B139.75.1 6.5879.57PE ends,Black Paint
2ERWAPI 5LQ345B168.36.1 11.955014.58PE ends,Black Paint
3LSAWASTM A516 GR70 CL2276212.7668.45 BE ends,Light Oil Paint
4LSAWASTM A516 GR70 CL22660.412.762024.34 BE ends,Light Oil Paint
5LSAWASTM A516 GR70 CL22660.49.52665.50 BE ends,Light Oil Paint
6LSAWA672C60 CL13406.49.531222.24 Bare Pipe, BE ends
7LSAWA672C60 CL13609.69.53122847.38 Bare Pipe, BE ends
8LSAWA672C60 CL13914.49.531212.55 Bare Pipe, BE ends
9LSAWA672C60 CL13101611.13120.00 Bare Pipe, BE ends
10LSAWA672C60 CL13406.46.35121813.53 Bare Pipe, BE ends
11LSAWA672C60 CL13457.26.351221.69 Bare Pipe, BE ends
12LSAWA672C60 CL135086.351210.94 Bare Pipe, BE ends
13LSAWA672C60 CL13609.67.92120.00 Bare Pipe, BE ends
14LSAWA672C65 CL13609.69.5312813.54 Bare Pipe, BE ends
15Welded PipeA358304/304L219.13.57610 1.15 PE ends, end caps in woven bags
16Welded PipeA358304/304L2733.98622 3.52 PE ends, end caps in woven bags
17Welded PipeA358304/304L323.84.3464 0.83 PE ends, end caps in woven bags
18Welded PipeA358304/304L355.64.5465 1.19 PE ends, end caps in woven bags
19Welded PipeA358304/304L406.44.5464 1.09 PE ends, end caps in woven bags
20Welded PipeA358304/304L406.412.0765 3.56 PE ends, end caps in woven bags
21Welded PipeA358304/304L457.24.5464 1.23 PE ends, end caps in woven bags
22Welded PipeA358304/304L50814.3461 1.06 PE ends, end caps in woven bags
23Welded PipeA358304/304L7627.5263 2.54 PE ends, end caps in woven bags
24Welded PipeA312N0890488.92.961 0.04 PE ends, end caps in woven bags
25Welded PipeA312N08904219.13.5761 0.12 PE ends, end caps in woven bags
26Welded PipeA312N08904168.33.2361 0.08 PE ends, end caps in woven bags
27Welded PipeA312N08904219.13.5762 0.23 PE ends, end caps in woven bags
28Welded PipeA312N08904355.64.5462 0.48 PE ends, end caps in woven bags
29Welded PipeA312N08904406.44.5462 0.55 PE ends, end caps in woven bags
33SSAWAPI 5LGr.B10167.2512000170367.91 PE ends
34ERWAPI 5LGr.B60.33122500127.17 PE ends
35Hollow SectionASTM A500Gr.B40*401.512m370081.22 Bare Pipe, PE ends
36Hollow SectionASTM A500Gr.B80*801.512m3700164.91 Bare Pipe, PE ends
37Hollow SectionASTM A500Gr.B100*1008.7512m493148.32 Bare Pipe, PE ends
38Hollow SectionASTM A500Gr.B100*1504.7512m38783.29 Bare Pipe, PE ends
All the best,
P Please consider the environment before printing this e-mail.