>From c9dfdeb5b9f38e94eca3c489091314a4e82f4864 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Thu, 10 Dec 2015 10:41:39 +0800
Subject: [PATCH] sched/fair: EASv5: Spread Tasks With Lower OPP

With this patch, we will select best CPU from every sched group
with below priority:

- Select CPUs with lowest OPP to meet capacity requirement
- Select CPUs with highest utilization
- Select CPUs with least CPU ID

After the selections, then need compare these candidates CPUs
and select best CPU from energy data.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
---
 kernel/sched/fair.c | 157 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 104 insertions(+), 53 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ce293ff..127a354 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5038,6 +5038,9 @@ static int find_new_capacity(struct energy_env *eenv,
 		}
 	}

+	if (idx == sge->nr_cap_states)
+		idx = idx - 1;
+
 	eenv->cap_idx = idx;
 	return idx;
 }
@@ -5557,87 +5560,135 @@ done:
 	return target;
 }

-static int energy_aware_wake_cpu(struct task_struct *p, int target)
+static int find_cpu_new_capacity(int cpu, unsigned long util)
 {
 	struct sched_domain *sd;
-	struct sched_group *sg, *sg_target;
-	int target_max_cap = INT_MAX;
-	int target_cpu = task_cpu(p);
-	int i;
+	struct sched_group_energy *sge;
+	int idx;

-	sd = rcu_dereference(per_cpu(sd_ea, task_cpu(p)));
+	sd = rcu_dereference(per_cpu(sd_ea, cpu));
+	sge = sd->groups->sge;

-	if (!sd)
-		return target;
+	for (idx = 0; idx < sge->nr_cap_states; idx++)
+		if (sge->cap_states[idx].cap >= util)
+			break;

-	sg = sd->groups;
-	sg_target = sg;
+	if (idx == sge->nr_cap_states)
+		idx = idx - 1;

-	/*
-	 * Find group with sufficient capacity. We only get here if no cpu is
-	 * overutilized. We may end up overutilizing a cpu by adding the task,
-	 * but that should not be any worse than select_idle_sibling().
-	 * load_balance() should sort it out later as we get above the tipping
-	 * point.
-	 */
-	do {
-		/* Assuming all cpus are the same in group */
-		int max_cap_cpu = group_first_cpu(sg);
+	return idx;
+}

-		/*
-		 * Assume smaller max capacity means more energy-efficient.
-		 * Ideally we should query the energy model for the right
-		 * answer but it easily ends up in an exhaustive search.
-		 */
-		if (capacity_of(max_cap_cpu) < target_max_cap &&
-		    task_fits_capacity(p, max_cap_cpu)) {
-			sg_target = sg;
-			target_max_cap = capacity_of(max_cap_cpu);
-		}
-	} while (sg = sg->next, sg != sd->groups);
+static void find_best_cpu_in_sg(struct cpumask *mask, struct sched_group *sg,
+			       struct task_struct *p)
+{
+	int min_opp = INT_MAX, max_usage = 0, new_usage;
+	int target_cpu = -1, i;
+
+	for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) {
+
+		int opp;

-	/* Find cpu with sufficient capacity */
-	for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) {
 		/*
 		 * p's blocked utilization is still accounted for on prev_cpu
 		 * so prev_cpu will receive a negative bias due the double
 		 * accouting. However, the blocked utilization may be zero.
 		 */
-		int new_usage = get_cpu_usage(i) + task_utilization(p);
+		new_usage = get_cpu_usage(i) + task_utilization(p);

-		if (new_usage >	capacity_orig_of(i))
+		opp = find_cpu_new_capacity(i, new_usage);
+
+		/* If need higher OPP, then skip */
+		if (min_opp < opp)
 			continue;

-		if (new_usage <	capacity_curr_of(i)) {
+		/* If CPU with lowwer OPP, just use it */
+		if (min_opp > opp) {
+			min_opp = opp;
+			max_usage = new_usage;
 			target_cpu = i;
-			if (cpu_rq(i)->nr_running)
-				break;
+			continue;
 		}

-		/* cpu has capacity at higher OPP, keep it as fallback */
-		if (target_cpu == task_cpu(p))
+		if (max_usage < new_usage) {
+			max_usage = new_usage;
 			target_cpu = i;
-	}
+			continue;
+		}

-	if (target_cpu != task_cpu(p)) {
-		struct energy_env eenv = {
-			.usage_delta	= task_utilization(p),
-			.src_cpu	= task_cpu(p),
-			.dst_cpu	= target_cpu,
-			.task		= p,
-		};
+		if (i < target_cpu) {
+			target_cpu = i;
+			continue;
+		}
+ 	}

-		/* Not enough spare capacity on previous cpu */
-		if (cpu_overutilized(task_cpu(p)))
-			return target_cpu;
+	BUG_ON(target_cpu == -1);

-		if (energy_diff(&eenv) >= 0)
-			return task_cpu(p);
+	cpumask_set_cpu(target_cpu, mask);
+	return;
+}
+
+static int find_power_efficient_cpu(struct cpumask *mask, struct task_struct *p)
+{
+	int i, target_cpu;
+	int min_energy = 0, diff;
+	struct energy_env eenv;
+
+	target_cpu = task_cpu(p);
+
+	for_each_cpu(i, mask) {
+
+		if (i == task_cpu(p))
+			continue;
+
+		memset(&eenv, 0, sizeof(eenv));
+
+		eenv.usage_delta = task_utilization(p),
+		eenv.src_cpu	 = task_cpu(p),
+		eenv.dst_cpu	 = i,
+		eenv.task	 = p,
+
+		diff = energy_diff(&eenv);
+		if (diff < min_energy) {
+			target_cpu = i;
+			min_energy = diff;
+		}
 	}

 	return target_cpu;
 }

+
+static int energy_aware_wake_cpu(struct task_struct *p, int target)
+{
+	struct sched_domain *sd;
+	struct sched_group *sg, *sg_target;
+	int target_cpu;
+	struct cpumask target_cpus;
+
+	sd = rcu_dereference(per_cpu(sd_ea, task_cpu(p)));
+
+	if (!sd)
+		return target;
+
+	sg = sd->groups;
+	sg_target = sg;
+
+	cpumask_clear(&target_cpus);
+
+	do {
+		find_best_cpu_in_sg(&target_cpus, sg, p);
+
+	} while (sg = sg->next, sg != sd->groups);
+
+	if (cpumask_empty(&target_cpus))
+		cpumask_set_cpu(task_cpu(p), &target_cpus);
+
+	target_cpu = find_power_efficient_cpu(&target_cpus, p);
+
+	return target_cpu;
+}
+
 /*
  * select_task_rq_fair: Select target runqueue for the waking task in domains
  * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
--
1.9.1

