Maybe we can skip local group since it's a bottom-up search so we know there's no idle cpu in the lower domain from the prior iteration.
I did this change but seems results are worse on my machines, guess start seeking idle cpu bottom up is a bad idea. The following is full version with above change.
also tried to keep top-down seeking mode, and will return any idle cpu instead of the first cpu in a idle group. But the result doesn't show better on benchmark hackbench.
=== diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5eea870..fb85094 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3169,6 +3169,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) return 1; } + /* bias toward prev cpu */ return 0; } @@ -3252,7 +3253,8 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) /* * Try and locate an idle CPU in the sched_domain. */ -static int select_idle_sibling(struct task_struct *p, int target) +static int select_idle_sibling(struct task_struct *p, + struct sched_domain *affine_sd, int sync) { int cpu = smp_processor_id(); int prev_cpu = task_cpu(p); @@ -3264,20 +3266,23 @@ static int select_idle_sibling(struct task_struct *p, int target) * If the task is going to be woken-up on this cpu and if it is * already idle, then it is the right target. */ - if (target == cpu && idle_cpu(cpu)) + if (idle_cpu(cpu)) return cpu; /* * If the task is going to be woken-up on the cpu where it previously * ran and if it is currently idle, then it the right target. */ - if (target == prev_cpu && idle_cpu(prev_cpu)) + if (cpu != prev_cpu && idle_cpu(prev_cpu)) return prev_cpu; + if (cpu != prev_cpu && !wake_affine(affine_sd, p, sync)) + cpu = prev_cpu; + /* * Otherwise, iterate the domains and find an elegible idle cpu. */ - sd = rcu_dereference(per_cpu(sd_llc, target)); + sd = rcu_dereference(per_cpu(sd_llc, cpu)); for_each_lower_domain(sd) { sg = sd->groups; do { @@ -3290,7 +3295,7 @@ static int select_idle_sibling(struct task_struct *p, int target) goto next; } - target = cpumask_first_and(sched_group_cpus(sg), + cpu = cpumask_first_and(sched_group_cpus(sg), tsk_cpus_allowed(p)); goto done; next: @@ -3298,7 +3303,7 @@ next: } while (sg != sd->groups); } done: - return target; + return cpu; } /* @@ -3351,10 +3356,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) } if (affine_sd) { - if (cpu != prev_cpu && wake_affine(affine_sd, p, sync)) - prev_cpu = cpu; - - new_cpu = select_idle_sibling(p, prev_cpu); + new_cpu = select_idle_sibling(p, affine_sd, sync); goto unlock; }