While looking for CPUs to place running tasks on, the scheduler completely ignores the capacity stolen away by RT/IRQ tasks.
This patch fixes that.
Signed-off-by: Rohit Jain rohit.k.jain@oracle.com --- kernel/sched/fair.c | 54 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 11 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index afb701f..19ff2c3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6040,7 +6040,10 @@ void __update_idle_core(struct rq *rq) static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target) { struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); - int core, cpu; + int core, cpu, rcpu, rcpu_backup; + unsigned int backup_cap = 0; + + rcpu = rcpu_backup = -1;
if (!static_branch_likely(&sched_smt_present)) return -1; @@ -6057,10 +6060,20 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int cpumask_clear_cpu(cpu, cpus); if (!idle_cpu(cpu)) idle = false; + + if (full_capacity(cpu)) { + rcpu = cpu; + } else if ((rcpu == -1) && (capacity_of(cpu) > backup_cap)) { + backup_cap = capacity_of(cpu); + rcpu_backup = cpu; + } }
- if (idle) - return core; + if (idle) { + if (rcpu == -1) + return (rcpu_backup != -1 ? rcpu_backup : core); + return rcpu; + } }
/* @@ -6076,7 +6089,8 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int */ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) { - int cpu; + int cpu, backup_cpu = -1; + unsigned int backup_cap = 0;
if (!static_branch_likely(&sched_smt_present)) return -1; @@ -6084,11 +6098,17 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t for_each_cpu(cpu, cpu_smt_mask(target)) { if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) continue; - if (idle_cpu(cpu)) - return cpu; + if (idle_cpu(cpu)) { + if (full_capacity(cpu)) + return cpu; + if (capacity_of(cpu) > backup_cap) { + backup_cap = capacity_of(cpu); + backup_cpu = cpu; + } + } }
- return -1; + return backup_cpu; }
#else /* CONFIG_SCHED_SMT */ @@ -6117,6 +6137,8 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t u64 time, cost; s64 delta; int cpu, nr = INT_MAX; + int backup_cpu = -1; + unsigned int backup_cap = 0;
this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); if (!this_sd) @@ -6147,10 +6169,19 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t return -1; if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) continue; - if (idle_cpu(cpu)) - break; + if (idle_cpu(cpu)) { + if (full_capacity(cpu)) { + backup_cpu = -1; + break; + } else if (capacity_of(cpu) > backup_cap) { + backup_cap = capacity_of(cpu); + backup_cpu = cpu; + } + } }
+ if (backup_cpu >= 0) + cpu = backup_cpu; time = local_clock() - time; cost = this_sd->avg_scan_cost; delta = (s64)(time - cost) / 8; @@ -6167,13 +6198,14 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) struct sched_domain *sd; int i;
- if (idle_cpu(target)) + if (idle_cpu(target) && full_capacity(target)) return target;
/* * If the previous cpu is cache affine and idle, don't be stupid. */ - if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev)) + if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev) + && full_capacity(prev)) return prev;
sd = rcu_dereference(per_cpu(sd_llc, target)); -- 2.7.4