Hi Rohit,
Just some comments:
On 09/25/2017 05:02 PM, Rohit Jain wrote:
While looking for idle CPUs for a waking task, we should also account for the delays caused due to the bandwidth reduction by RT/IRQ tasks.
This patch does that by trying to find a higher capacity CPU with minimum wake up latency.
Signed-off-by: Rohit Jain rohit.k.jain@oracle.com
kernel/sched/fair.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index eca6a57..afb701f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5590,6 +5590,11 @@ static unsigned long capacity_orig_of(int cpu) return cpu_rq(cpu)->cpu_capacity_orig; }
+static inline bool full_capacity(int cpu) +{
- return (capacity_of(cpu) >= (capacity_orig_of(cpu)*819 >> 10));
Wouldn't 768 be better for multiplication? gcc converts the expression to shifts and adds then.
+}
static unsigned long cpu_avg_load_per_task(int cpu) { struct rq *rq = cpu_rq(cpu); @@ -5916,8 +5921,10 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) unsigned long load, min_load = ULONG_MAX; unsigned int min_exit_latency = UINT_MAX; u64 latest_idle_timestamp = 0;
unsigned int backup_cap = 0; int least_loaded_cpu = this_cpu; int shallowest_idle_cpu = -1;
int shallowest_idle_cpu_backup = -1; int i;
/* Check if we have any choice: */
@@ -5937,7 +5944,12 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) */ min_exit_latency = idle->exit_latency; latest_idle_timestamp = rq->idle_stamp;
shallowest_idle_cpu = i;
if (full_capacity(i)) {
shallowest_idle_cpu = i;
} else if (capacity_of(i) > backup_cap) {
shallowest_idle_cpu_backup = i;
backup_cap = capacity_of(i);
}
I'm a bit skeptical about this - if the CPU is idle, then is it likely that the capacity of the CPU is reduced due to RT pressure? I can see that it can matter, but I am wondering if you have any data for your usecase to show that it does (that is if you didn't consider RT pressure for idle CPUs, are you still seeing a big enough performance improvement to warrant the change?
} else if ((!idle || idle->exit_latency == min_exit_latency) && rq->idle_stamp > latest_idle_timestamp) { /*
@@ -5946,7 +5958,12 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) * a warmer cache. */ latest_idle_timestamp = rq->idle_stamp;
shallowest_idle_cpu = i;
if (full_capacity(i)) {
shallowest_idle_cpu = i;
} else if (capacity_of(i) > backup_cap) {
shallowest_idle_cpu_backup = i;
backup_cap = capacity_of(i);
} else if (shallowest_idle_cpu == -1) { load = weighted_cpuload(cpu_rq(i));} }
@@ -5957,7 +5974,11 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) } }
- return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
- if (shallowest_idle_cpu != -1)
return shallowest_idle_cpu;
- return (shallowest_idle_cpu_backup != -1 ?
shallowest_idle_cpu_backup : least_loaded_cpu);
}
#ifdef CONFIG_SCHED_SMT
I see code duplication here which can be reduced by 7 lines compared to your original patch:
--- kernel/sched/fair.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c95880e216f6..72fc8d18b251 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5528,6 +5528,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) /* Traverse only the allowed CPUs */ for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) { if (idle_cpu(i)) { + int idle_candidate = -1; struct rq *rq = cpu_rq(i); struct cpuidle_state *idle = idle_get_state(rq); if (idle && idle->exit_latency < min_exit_latency) { @@ -5538,7 +5539,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) */ min_exit_latency = idle->exit_latency; latest_idle_timestamp = rq->idle_stamp; - shallowest_idle_cpu = i; + idle_candidate = i; } else if ((!idle || idle->exit_latency == min_exit_latency) && rq->idle_stamp > latest_idle_timestamp) { /* @@ -5547,7 +5548,16 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) * a warmer cache. */ latest_idle_timestamp = rq->idle_stamp; - shallowest_idle_cpu = i; + idle_candidate = i; + } + + if (idle_candidate != -1) { + if (full_capacity(idle_candidate)) { + shallowest_idle_cpu = idle_candidate; + } else if (capacity_of(idle_candidate) > backup_cap) { + shallowest_idle_cpu_backup = idle_candidate; + backup_cap = capacity_of(idle_candidate); + } } } else if (shallowest_idle_cpu == -1) { load = weighted_cpuload(i); @@ -5558,7 +5568,11 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) } }
- return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu; + if (shallowest_idle_cpu != -1) + return shallowest_idle_cpu; + + return (shallowest_idle_cpu_backup != -1 ? + shallowest_idle_cpu_backup : least_loaded_cpu); }
#ifdef CONFIG_SCHED_SMT -- 2.14.1.821.g8fa685d3b7-goog