[Eas-dev] [PATCH RFCv2 3/8] sched/fair: support to spread task in lowest schedule domain

12 Jul 2016

In current code if under tipping point tasks are packed on one CPU. In
this case, it's possible to have only one CPU is very busy but other
CPUs in the same cluster are in idle states. So the performance issue
occurs: if under tipping point, there have no mechanism to spread
tasks within same cluster.
If rely on "over-utilized" as tipping point to spread tasks, there have
two issues: the first reason is: "over-utilized" is rigid condition so
CPU need take long time to reach 80% of CPU capacity, so this will delay
the time to meet task performance requirement; the second reason is
after over tipping point, scheduler will directly migrate tasks to big
cluster rather than spread tasks in little cluster.
This patch is to add "half-utilized" as a medium state; if there have
CPU is over 50% utilization, then we consider the CPU is "half-utilized"
and as result it will try to spread tasks within same cluster, this is
true both for any schedule domain (or cluster). So it need change two
places for condition checking, one is for waken up path, another is for
idle balance path; after over "half-utilized", both of them will try to
spread tasks in the lowest schedule domain for the cluster.
Signed-off-by: Leo Yan leo.yan@linaro.org
---
 kernel/sched/fair.c | 45 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 42 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 804e8c8..747d27d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4213,6 +4213,8 @@ static void update_capacity_of(int cpu)
 }
static bool cpu_overutilized(int cpu);
+static bool cpu_halfutilized(int cpu);
+static bool need_spread_task(int cpu);
/*
  * The enqueue_task method is called before nr_running is
@@ -5284,6 +5286,32 @@ static bool cpu_overutilized(int cpu)
    return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin);
 }
+static bool cpu_halfutilized(int cpu)
+{
+	return capacity_of(cpu) < (cpu_util(cpu) * 2);
+}
+
+static bool need_spread_task(int cpu)
+{
+	struct sched_domain *sd;
+	int spread = 0, i;
+
+	sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd);
+
+	if (!sd)
+		return 0;
+
+	for_each_cpu(i, sched_domain_span(sd)) {
+		if (cpu_rq(cpu)->cfs.h_nr_running >= 2 &&
+				cpu_halfutilized(i)) {
+			spread = 1;
+			break;
+		}
+	}
+
+	return spread;
+}
+
 #ifdef CONFIG_SCHED_TUNE
static unsigned long
@@ -5733,7 +5761,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
    }
if (!sd) {
-		if (energy_aware() && !cpu_rq(cpu)->rd->overutilized)
+		if (energy_aware() && !need_spread_task(cpu))
    		new_cpu = energy_aware_wake_cpu(p, prev_cpu);
    	else if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
    		new_cpu = select_idle_sibling(p, new_cpu);
@@ -7683,8 +7711,19 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
    trace_sched_sd_lb_stats(sched_group_cpus(env->sd->groups),
    	sds.total_load, sds.total_capacity, sds.avg_load);
-	if (energy_aware() && !env->dst_rq->rd->overutilized)
-		goto out_balanced;
+	if (energy_aware() && !env->dst_rq->rd->overutilized) {
+
+		struct sched_domain *sd;
+		int cpu = env->dst_cpu;
+
+		sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd);
+		if (!cpumask_equal(sched_domain_span(sd),
+				   sched_domain_span(env->sd)))
+			goto out_balanced;
+
+		if (!need_spread_task(cpu))
+			goto out_balanced;
+	}
local = &sds.local_stat;
    busiest = &sds.busiest_stat;
--
1.9.1

    

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

[Eas-dev] [PATCH RFCv2 3/8] sched/fair: support to spread task in lowest schedule domain