These are the changes which change the scheduler behavior based on the cpus_preferred mask. Keep in mind that when the system call changes cpus_allowed mask, cpus_preferred and cpus_allowed become the same.
Signed-off-by: Rohit Jain rohit.k.jain@oracle.com --- kernel/sched/cpudeadline.c | 4 +- kernel/sched/cpupri.c | 4 +- kernel/sched/fair.c | 116 +++++++++++++++++++++++++++++++++------------ 3 files changed, 91 insertions(+), 33 deletions(-)
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index 8d9562d..32135b9 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c @@ -127,13 +127,13 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, const struct sched_dl_entity *dl_se = &p->dl;
if (later_mask && - cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) { + cpumask_and(later_mask, cp->free_cpus, &p->cpus_preferred)) { return 1; } else { int best_cpu = cpudl_maximum(cp); WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));
- if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) && + if (cpumask_test_cpu(best_cpu, &p->cpus_preferred) && dl_time_before(dl_se->deadline, cp->elements[0].dl)) { if (later_mask) cpumask_set_cpu(best_cpu, later_mask); diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 2511aba..9641b8d 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -103,11 +103,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, if (skip) continue;
- if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) + if (cpumask_any_and(&p->cpus_preferred, vec->mask) >= nr_cpu_ids) continue;
if (lowest_mask) { - cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); + cpumask_and(lowest_mask, &p->cpus_preferred, vec->mask);
/* * We have to ensure that we have at least one bit diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index eca6a57..35e73c7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5805,7 +5805,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
/* Skip over this group if it has no CPUs allowed */ if (!cpumask_intersects(sched_group_span(group), - &p->cpus_allowed)) + &p->cpus_preferred)) continue;
local_group = cpumask_test_cpu(this_cpu, @@ -5925,7 +5925,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) return cpumask_first(sched_group_span(group));
/* Traverse only the allowed CPUs */ - for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) { + for_each_cpu_and(i, sched_group_span(group), &p->cpus_preferred) { if (idle_cpu(i)) { struct rq *rq = cpu_rq(i); struct cpuidle_state *idle = idle_get_state(rq); @@ -6011,6 +6011,27 @@ void __update_idle_core(struct rq *rq) rcu_read_unlock(); }
+static inline int +scan_cpu_mask_for_idle_cores(struct cpumask *cpus, int target) +{ + int core, cpu; + + for_each_cpu_wrap(core, cpus, target) { + bool idle = true; + + for_each_cpu(cpu, cpu_smt_mask(core)) { + cpumask_clear_cpu(cpu, cpus); + if (!idle_cpu(cpu)) + idle = false; + } + + if (idle) + return core; + } + + return -1; +} + /* * Scan the entire LLC domain for idle cores; this dynamically switches off if * there are no idle cores left in the system; tracked through @@ -6019,7 +6040,8 @@ void __update_idle_core(struct rq *rq) static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target) { struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); - int core, cpu; + struct cpumask *pcpus = this_cpu_cpumask_var_ptr(select_idle_mask); + int core;
if (!static_branch_likely(&sched_smt_present)) return -1; @@ -6028,20 +6050,21 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int return -1;
cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed); + cpumask_and(pcpus, cpus, &p->cpus_preferred); + core = scan_cpu_mask_for_idle_cores(pcpus, target);
- for_each_cpu_wrap(core, cpus, target) { - bool idle = true; + if (core >= 0) + return core;
- for_each_cpu(cpu, cpu_smt_mask(core)) { - cpumask_clear_cpu(cpu, cpus); - if (!idle_cpu(cpu)) - idle = false; - } + if (cpumask_equal(cpus, pcpus)) + goto out;
- if (idle) - return core; - } + cpumask_andnot(cpus, cpus, pcpus); + core = scan_cpu_mask_for_idle_cores(cpus, target);
+ if (core >= 0) + return core; +out: /* * Failed to find an idle core; stop looking for one. */ @@ -6050,24 +6073,40 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int return -1; }
+static inline int +scan_cpu_mask_for_idle_smt(struct cpumask *cpus, int target) +{ + int cpu; + + for_each_cpu(cpu, cpu_smt_mask(target)) { + if (!cpumask_test_cpu(cpu, cpus)) + continue; + if (idle_cpu(cpu)) + return cpu; + } + + return -1; +} + /* * Scan the local SMT mask for idle CPUs. */ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) { + struct cpumask *cpus = &p->cpus_allowed; int cpu;
if (!static_branch_likely(&sched_smt_present)) return -1;
- for_each_cpu(cpu, cpu_smt_mask(target)) { - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) - continue; - if (idle_cpu(cpu)) - return cpu; - } + cpu = scan_cpu_mask_for_idle_smt(&p->cpus_preferred, target);
- return -1; + if (cpu >= 0 || cpumask_equal(&p->cpus_preferred, cpus)) + return cpu; + + cpumask_andnot(cpus, cpus, &p->cpus_preferred); + + return scan_cpu_mask_for_idle_smt(cpus, target); }
#else /* CONFIG_SCHED_SMT */ @@ -6084,6 +6123,24 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
#endif /* CONFIG_SCHED_SMT */
+static inline int +scan_cpu_mask_for_idle_cpu(struct cpumask *cpus, int target, + struct sched_domain *sd, int nr) +{ + int cpu; + + for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { + if (!--nr) + return -1; + if (!cpumask_test_cpu(cpu, cpus)) + continue; + if (idle_cpu(cpu)) + break; + } + + return cpu; +} + /* * Scan the LLC domain for idle CPUs; this is dynamically regulated by * comparing the average scan cost (tracked in sd->avg_scan_cost) against the @@ -6092,6 +6149,7 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target) { struct sched_domain *this_sd; + struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); u64 avg_cost, avg_idle; u64 time, cost; s64 delta; @@ -6121,15 +6179,15 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
time = local_clock();
- for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { - if (!--nr) - return -1; - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) - continue; - if (idle_cpu(cpu)) - break; - } + cpu = scan_cpu_mask_for_idle_cpu(&p->cpus_preferred, target, sd, nr); + + if (cpu >= 0 || cpumask_equal(&p->cpus_preferred, &p->cpus_allowed)) + goto out;
+ cpumask_andnot(cpus, &p->cpus_allowed, &p->cpus_preferred); + + cpu = scan_cpu_mask_for_idle_cpu(cpus, target, sd, nr); +out: time = local_clock() - time; cost = this_sd->avg_scan_cost; delta = (s64)(time - cost) / 8; @@ -6279,7 +6337,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f if (sd_flag & SD_BALANCE_WAKE) { record_wakee(p); want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) - && cpumask_test_cpu(cpu, &p->cpus_allowed); + && cpumask_test_cpu(cpu, &p->cpus_preferred); }
rcu_read_lock(); -- 2.7.4