Re: [Eas-dev] [Question] EAS: Spread Tasks With Lower OPP

15 Dec 2015

Hi all,
On Thu, Dec 10, 2015 at 10:53:06AM +0800, Leo Yan wrote:
...
On Wed, Nov 25, 2015 at 10:41:32AM -0800, Steve Muckle wrote:
...
On 11/24/2015 07:55 PM, Leo Yan wrote:
[...]
...
...
Let's say we are placing a small task on a big.Little system, and that
small task could fit on both the big and Little cluster.
Does the above statement imply that we would not evaluate the best CPU
in the big cluster? I'd think we should, in addition to the best CPU in
the little cluster, and decide between those two options. This is
because we can have cases where the big cluster is actually the most
efficient place to run a task due to current task loads and the OPP of
the little cluster.
...

Select CPUs with lowest OPP to meet capacity requirement;
Select CPUs with highest utilization (as your said, here need to try
to use least one, and I think it's more suitable for rt-app cases,
even rt-app-6 also will take 35% CPU's utilization when CPU run at
lowest OPP);
Select CPUs with least CPU ID;

If you think here have no obvious logic error, I will try it in next
1~2 weeks and post result after finish related testing.
Could you post your draft changes here prior to testing? It'll help
ensure I'm following your proposal correctly.
Below are the code with our discussion, please help review; I also
enclosed the patch in case you want to check with diff format.
---<8---
static int find_cpu_new_capacity(int cpu, unsigned long util)
{
   struct sched_domain *sd;
   struct sched_group_energy *sge;
   int idx;
sd = rcu_dereference(per_cpu(sd_ea, cpu));
   sge = sd->groups->sge;
for (idx = 0; idx < sge->nr_cap_states; idx++)
   	if (sge->cap_states[idx].cap >= util)
   		break;
if (idx == sge->nr_cap_states)
   	idx = idx - 1;
return idx;
}
static void find_best_cpu_in_sg(struct cpumask *mask, struct sched_group *sg,
   		       struct task_struct *p)
{
   int min_opp = INT_MAX, max_usage = 0, new_usage;
   int target_cpu = -1, i;
for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) {
int opp;

/*
 * p's blocked utilization is still accounted for on prev_cpu
 * so prev_cpu will receive a negative bias due the double
 * accouting. However, the blocked utilization may be zero.
 */
new_usage = get_cpu_usage(i) + task_utilization(p);

When I continue to profile with this patch, I found I cannot get
expected result; the tasks will be migrated in a mess after applied
this patch.
The target CPU's selection is quite dependent on CPU's utilization,
but from ftrace data file, the cfs_rq::utilization_load_avg will be
increased sharply. Then finally it will imapct on CPU's migration.
So in [2], we can see the task even has been migrated on CPU2
with energy calculation, but it will be finally be migrated onto CPU3
becaused CPU2's utilization value is increased sharply and meet the
condition of cpu_overutilized().
It's make sense for CPU's utilization's decay, but it should
increase in step-wise when CPU is running for some tasks. So I want to
confirm if this is the expected behavior for CPU's utilization,
which will be increased sharply when enqueue one task on the CPU's rq?
I saw there have many polishment on CPU and task's load tracking recently,
so do you think this issue has been fixed with new kernel (I'm using 4.2-rc6)?
Welcome any comment and suggestion.
Thanks,
Leo Yan
[1] http://people.linaro.org/~leo.yan/eas_profiling/eas_cpu_utilization_increase...
[2] http://people.linaro.org/~leo.yan/eas_profiling/eas_task_migrate_with_high_c...
...
opp = find_cpu_new_capacity(i, new_usage);

/* If need higher OPP, then skip */
if (min_opp < opp)
	continue;

/* If CPU with lowwer OPP, just use it */
if (min_opp > opp) {
	min_opp = opp;
	max_usage = new_usage;
	target_cpu = i;
	continue;
}

if (max_usage < new_usage) {
	max_usage = new_usage;
	target_cpu = i;
	continue;
}

if (i < target_cpu) {
	target_cpu = i;
	continue;
}

}
BUG_ON(target_cpu == -1);
cpumask_set_cpu(target_cpu, mask);
   return;
}
static int find_power_efficient_cpu(struct cpumask *mask, struct task_struct *p)
{
   int i, target_cpu;
   int min_energy = 0, diff;
   struct energy_env eenv;
target_cpu = task_cpu(p);
for_each_cpu(i, mask) {
if (i == task_cpu(p))
	continue;

memset(&eenv, 0, sizeof(eenv));

eenv.usage_delta = task_utilization(p),
eenv.src_cpu	 = task_cpu(p),
eenv.dst_cpu	 = i,
eenv.task	 = p,

diff = energy_diff(&eenv);
if (diff < min_energy) {
	target_cpu = i;
	min_energy = diff;
}

}
return target_cpu;
}
static int energy_aware_wake_cpu(struct task_struct *p, int target)
{
   struct sched_domain *sd;
   struct sched_group *sg, *sg_target;
   int target_cpu;
   struct cpumask target_cpus;
sd = rcu_dereference(per_cpu(sd_ea, task_cpu(p)));
if (!sd)
   	return target;
sg = sd->groups;
   sg_target = sg;
cpumask_clear(&target_cpus);
do {
   	find_best_cpu_in_sg(&target_cpus, sg, p);
} while (sg = sg->next, sg != sd->groups);
if (cpumask_empty(&target_cpus))
   	cpumask_set_cpu(task_cpu(p), &target_cpus);
target_cpu = find_power_efficient_cpu(&target_cpus, p);
return target_cpu;
}
--->8---
Thanks,
Leo Yan
...
From c9dfdeb5b9f38e94eca3c489091314a4e82f4864 Mon Sep 17 00:00:00 2001
From: Leo Yan leo.yan@linaro.org
Date: Thu, 10 Dec 2015 10:41:39 +0800
Subject: [PATCH] sched/fair: EASv5: Spread Tasks With Lower OPP
With this patch, we will select best CPU from every sched group
with below priority:

Select CPUs with lowest OPP to meet capacity requirement
Select CPUs with highest utilization
Select CPUs with least CPU ID

After the selections, then need compare these candidates CPUs
and select best CPU from energy data.
Signed-off-by: Leo Yan leo.yan@linaro.org
kernel/sched/fair.c | 157 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 104 insertions(+), 53 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ce293ff..127a354 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5038,6 +5038,9 @@ static int find_new_capacity(struct energy_env *eenv,
   	}
   }

if (idx == sge->nr_cap_states)
idx = idx - 1;


eenv->cap_idx = idx;
 return idx;

}
@@ -5557,87 +5560,135 @@ done:
   return target;
 }
-static int energy_aware_wake_cpu(struct task_struct *p, int target)
+static int find_cpu_new_capacity(int cpu, unsigned long util)
 {
   struct sched_domain *sd;

struct sched_group *sg, *sg_target;
int target_max_cap = INT_MAX;
int target_cpu = task_cpu(p);
int i;


struct sched_group_energy *sge;
int idx;


sd = rcu_dereference(per_cpu(sd_ea, task_cpu(p)));


sd = rcu_dereference(per_cpu(sd_ea, cpu));
sge = sd->groups->sge;


if (!sd)
return target;




for (idx = 0; idx < sge->nr_cap_states; idx++)
if (sge->cap_states[idx].cap >= util)


	break;





sg = sd->groups;
sg_target = sg;


if (idx == sge->nr_cap_states)
idx = idx - 1;





/*
* Find group with sufficient capacity. We only get here if no cpu is


* overutilized. We may end up overutilizing a cpu by adding the task,


* but that should not be any worse than select_idle_sibling().


* load_balance() should sort it out later as we get above the tipping


* point.


*/


do {
/* Assuming all cpus are the same in group */


int max_cap_cpu = group_first_cpu(sg);




return idx;

+}

/*


 * Assume smaller max capacity means more energy-efficient.


 * Ideally we should query the energy model for the right


 * answer but it easily ends up in an exhaustive search.


 */


if (capacity_of(max_cap_cpu) < target_max_cap &&


    task_fits_capacity(p, max_cap_cpu)) {


	sg_target = sg;


	target_max_cap = capacity_of(max_cap_cpu);


}


} while (sg = sg->next, sg != sd->groups);

+static void find_best_cpu_in_sg(struct cpumask *mask, struct sched_group *sg,

	       struct task_struct *p)



+{

int min_opp = INT_MAX, max_usage = 0, new_usage;
int target_cpu = -1, i;

for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) {

int opp;





/* Find cpu with sufficient capacity */
for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg_target)) {
/*
p's blocked utilization is still accounted for on prev_cpu
so prev_cpu will receive a negative bias due the double
accouting. However, the blocked utilization may be zero.

*/
int new_usage = get_cpu_usage(i) + task_utilization(p);




new_usage = get_cpu_usage(i) + task_utilization(p);





if (new_usage >	capacity_orig_of(i))




opp = find_cpu_new_capacity(i, new_usage);



/* If need higher OPP, then skip */


if (min_opp < opp)
continue;





if (new_usage <	capacity_curr_of(i)) {




/* If CPU with lowwer OPP, just use it */


if (min_opp > opp) {


	min_opp = opp;


	max_usage = new_usage;
target_cpu = i;




	if (cpu_rq(i)->nr_running)


		break;




	continue;

}


/* cpu has capacity at higher OPP, keep it as fallback */


if (target_cpu == task_cpu(p))




if (max_usage < new_usage) {


	max_usage = new_usage;
target_cpu = i;




}


	continue;


}





if (target_cpu != task_cpu(p)) {
struct energy_env eenv = {


	.usage_delta	= task_utilization(p),


	.src_cpu	= task_cpu(p),


	.dst_cpu	= target_cpu,


	.task		= p,


};




if (i < target_cpu) {


	target_cpu = i;


	continue;


}


}


/* Not enough spare capacity on previous cpu */


if (cpu_overutilized(task_cpu(p)))


	return target_cpu;




BUG_ON(target_cpu == -1);


if (energy_diff(&eenv) >= 0)


	return task_cpu(p);




cpumask_set_cpu(target_cpu, mask);
return;

+}



+static int find_power_efficient_cpu(struct cpumask *mask, struct task_struct *p)
+{

int i, target_cpu;

int min_energy = 0, diff;

struct energy_env eenv;


target_cpu = task_cpu(p);


for_each_cpu(i, mask) {


if (i == task_cpu(p))


	continue;



memset(&eenv, 0, sizeof(eenv));



eenv.usage_delta = task_utilization(p),


eenv.src_cpu	 = task_cpu(p),


eenv.dst_cpu	 = i,


eenv.task	 = p,



diff = energy_diff(&eenv);


if (diff < min_energy) {


	target_cpu = i;


	min_energy = diff;


}

}
return target_cpu;


}



+static int energy_aware_wake_cpu(struct task_struct *p, int target)
+{

struct sched_domain *sd;
struct sched_group *sg, *sg_target;
int target_cpu;
struct cpumask target_cpus;

sd = rcu_dereference(per_cpu(sd_ea, task_cpu(p)));

if (!sd)
return target;



sg = sd->groups;
sg_target = sg;

cpumask_clear(&target_cpus);

do {
find_best_cpu_in_sg(&target_cpus, sg, p);



} while (sg = sg->next, sg != sd->groups);

if (cpumask_empty(&target_cpus))
cpumask_set_cpu(task_cpu(p), &target_cpus);



target_cpu = find_power_efficient_cpu(&target_cpus, p);

return target_cpu;

+}



/*

select_task_rq_fair: Select target runqueue for the waking task in domains
that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,

--
1.9.1

    

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

Re: [Eas-dev] [Question] EAS: Spread Tasks With Lower OPP

Signed-off-by: Leo Yan leo.yan@linaro.org