[Eas-dev] [PATCH 2/2] sched: Actual changes after adding SCHED_SOFT_AFFINITY to make it work with the scheduler

19 Sep 2017

These are the changes which change the scheduler behavior based on the
cpus_preferred mask. Keep in mind that when the system call changes
cpus_allowed mask, cpus_preferred and cpus_allowed become the same.
Signed-off-by: Rohit Jain rohit.k.jain@oracle.com
---
 kernel/sched/cpudeadline.c |   4 +-
 kernel/sched/cpupri.c      |   4 +-
 kernel/sched/fair.c        | 116 +++++++++++++++++++++++++++++++++------------
 3 files changed, 91 insertions(+), 33 deletions(-)

diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 8d9562d..32135b9 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -127,13 +127,13 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
    const struct sched_dl_entity *dl_se = &p->dl;
if (later_mask &&
-	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
+	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_preferred)) {
    	return 1;
    } else {
    	int best_cpu = cpudl_maximum(cp);
    	WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));
-		if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) &&
+		if (cpumask_test_cpu(best_cpu, &p->cpus_preferred) &&
    	    dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
    		if (later_mask)
    			cpumask_set_cpu(best_cpu, later_mask);
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 2511aba..9641b8d 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -103,11 +103,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
    	if (skip)
    		continue;
-		if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
+		if (cpumask_any_and(&p->cpus_preferred, vec->mask) >= nr_cpu_ids)
    		continue;
if (lowest_mask) {
-			cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
+			cpumask_and(lowest_mask, &p->cpus_preferred, vec->mask);
/*
    		 * We have to ensure that we have at least one bit
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index eca6a57..35e73c7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5805,7 +5805,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
/* Skip over this group if it has no CPUs allowed */
    	if (!cpumask_intersects(sched_group_span(group),
-					&p->cpus_allowed))
+					&p->cpus_preferred))
    		continue;
local_group = cpumask_test_cpu(this_cpu,
@@ -5925,7 +5925,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
    	return cpumask_first(sched_group_span(group));
/* Traverse only the allowed CPUs */
-	for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
+	for_each_cpu_and(i, sched_group_span(group), &p->cpus_preferred) {
    	if (idle_cpu(i)) {
    		struct rq *rq = cpu_rq(i);
    		struct cpuidle_state *idle = idle_get_state(rq);
@@ -6011,6 +6011,27 @@ void __update_idle_core(struct rq *rq)
    rcu_read_unlock();
 }
+static inline int
+scan_cpu_mask_for_idle_cores(struct cpumask *cpus, int target)
+{
+	int core, cpu;
+
+	for_each_cpu_wrap(core, cpus, target) {
+		bool idle = true;
+
+		for_each_cpu(cpu, cpu_smt_mask(core)) {
+			cpumask_clear_cpu(cpu, cpus);
+			if (!idle_cpu(cpu))
+				idle = false;
+		}
+
+		if (idle)
+			return core;
+	}
+
+	return -1;
+}
+
 /*
  * Scan the entire LLC domain for idle cores; this dynamically switches off if
  * there are no idle cores left in the system; tracked through
@@ -6019,7 +6040,8 @@ void __update_idle_core(struct rq *rq)
 static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
 {
    struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-	int core, cpu;
+	struct cpumask *pcpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+	int core;
if (!static_branch_likely(&sched_smt_present))
    	return -1;
@@ -6028,20 +6050,21 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
    	return -1;
cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
+	cpumask_and(pcpus, cpus, &p->cpus_preferred);
+	core = scan_cpu_mask_for_idle_cores(pcpus, target);
-	for_each_cpu_wrap(core, cpus, target) {
-		bool idle = true;
+	if (core >= 0)
+		return core;
-		for_each_cpu(cpu, cpu_smt_mask(core)) {
-			cpumask_clear_cpu(cpu, cpus);
-			if (!idle_cpu(cpu))
-				idle = false;
-		}
+	if (cpumask_equal(cpus, pcpus))
+		goto out;
-		if (idle)
-			return core;
-	}
+	cpumask_andnot(cpus, cpus, pcpus);
+	core = scan_cpu_mask_for_idle_cores(cpus, target);
+	if (core >= 0)
+		return core;
+out:
    /*
     * Failed to find an idle core; stop looking for one.
     */
@@ -6050,24 +6073,40 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
    return -1;
 }
+static inline int
+scan_cpu_mask_for_idle_smt(struct cpumask *cpus, int target)
+{
+	int cpu;
+
+	for_each_cpu(cpu, cpu_smt_mask(target)) {
+		if (!cpumask_test_cpu(cpu, cpus))
+			continue;
+		if (idle_cpu(cpu))
+			return cpu;
+	}
+
+	return -1;
+}
+
 /*
  * Scan the local SMT mask for idle CPUs.
  */
 static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
 {
+	struct cpumask *cpus = &p->cpus_allowed;
    int cpu;
if (!static_branch_likely(&sched_smt_present))
    	return -1;
-	for_each_cpu(cpu, cpu_smt_mask(target)) {
-		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
-			continue;
-		if (idle_cpu(cpu))
-			return cpu;
-	}
+	cpu = scan_cpu_mask_for_idle_smt(&p->cpus_preferred, target);
-	return -1;
+	if (cpu >= 0 || cpumask_equal(&p->cpus_preferred, cpus))
+		return cpu;
+
+	cpumask_andnot(cpus, cpus, &p->cpus_preferred);
+
+	return scan_cpu_mask_for_idle_smt(cpus, target);
 }
#else /* CONFIG_SCHED_SMT */
@@ -6084,6 +6123,24 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
#endif /* CONFIG_SCHED_SMT */
+static inline int
+scan_cpu_mask_for_idle_cpu(struct cpumask *cpus, int target,
+			   struct sched_domain *sd, int nr)
+{
+	int cpu;
+
+	for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
+		if (!--nr)
+			return -1;
+		if (!cpumask_test_cpu(cpu, cpus))
+			continue;
+		if (idle_cpu(cpu))
+			break;
+	}
+
+	return cpu;
+}
+
 /*
  * Scan the LLC domain for idle CPUs; this is dynamically regulated by
  * comparing the average scan cost (tracked in sd->avg_scan_cost) against the
@@ -6092,6 +6149,7 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
 static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
 {
    struct sched_domain *this_sd;
+	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
    u64 avg_cost, avg_idle;
    u64 time, cost;
    s64 delta;
@@ -6121,15 +6179,15 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
time = local_clock();
-	for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
-		if (!--nr)
-			return -1;
-		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
-			continue;
-		if (idle_cpu(cpu))
-			break;
-	}
+	cpu = scan_cpu_mask_for_idle_cpu(&p->cpus_preferred, target, sd, nr);
+
+	if (cpu >= 0 || cpumask_equal(&p->cpus_preferred, &p->cpus_allowed))
+		goto out;
+	cpumask_andnot(cpus, &p->cpus_allowed, &p->cpus_preferred);
+
+	cpu = scan_cpu_mask_for_idle_cpu(cpus, target, sd, nr);
+out:
    time = local_clock() - time;
    cost = this_sd->avg_scan_cost;
    delta = (s64)(time - cost) / 8;
@@ -6279,7 +6337,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
    if (sd_flag & SD_BALANCE_WAKE) {
    	record_wakee(p);
    	want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
-			      && cpumask_test_cpu(cpu, &p->cpus_allowed);
+			      && cpumask_test_cpu(cpu, &p->cpus_preferred);
    }
rcu_read_lock();
--
2.7.4

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

[Eas-dev] [PATCH 2/2] sched: Actual changes after adding SCHED_SOFT_AFFINITY to make it work with the scheduler