Based on rq's rb tree, it's possible to directly select biggest task for migration. This patch is change to use the rb tree to iterate runnble tasks with the sequence from biggest task to most small task to check if can meet migration criteria. As result, this gives more chance for load balance to migrate big tasks.
This patch is to use rb tree in two functions, one is in detach_tasks() to do balance within busiest group and load group; another is in detach_one_task() for active load balance.
Signed-off-by: Leo Yan leo.yan@linaro.org --- kernel/sched/fair.c | 55 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 17 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f2148d9..0c68c80 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6847,25 +6847,40 @@ static void detach_task(struct task_struct *p, struct lb_env *env) static struct task_struct *detach_one_task(struct lb_env *env) { struct task_struct *p, *n; + struct rb_node *node; + struct sched_entity *se;
lockdep_assert_held(&env->src_rq->lock);
- list_for_each_entry_safe(p, n, &env->src_rq->cfs_tasks, se.group_node) { - if (!can_migrate_task(p, env)) - continue; + p = NULL;
- detach_task(p, env); + node = rb_first(&env->src_rq->seq_node); + while (node) {
- /* - * Right now, this is only the second place where - * lb_gained[env->idle] is updated (other is detach_tasks) - * so we can safely collect stats here rather than - * inside detach_tasks(). - */ - schedstat_inc(env->sd, lb_gained[env->idle]); - return p; + se = rb_entry(node, struct sched_entity, seq_node); + n = task_of(se); + + if (can_migrate_task(n, env)) { + p = n; + break; + } + + node = rb_next(&se->seq_node); } - return NULL; + + if (!p) + return NULL; + + detach_task(p, env); + + /* + * Right now, this is only the second place where + * lb_gained[env->idle] is updated (other is detach_tasks) + * so we can safely collect stats here rather than + * inside detach_tasks(). + */ + schedstat_inc(env->sd, lb_gained[env->idle]); + return p; }
static const unsigned int sched_nr_migrate_break = 32; @@ -6878,17 +6893,22 @@ static const unsigned int sched_nr_migrate_break = 32; */ static int detach_tasks(struct lb_env *env) { - struct list_head *tasks = &env->src_rq->cfs_tasks; struct task_struct *p; unsigned long load; int detached = 0; + struct sched_entity *se; + struct rb_node *node;
lockdep_assert_held(&env->src_rq->lock);
if (env->imbalance <= 0) return 0;
- while (!list_empty(tasks)) { + node = rb_first(&env->src_rq->seq_node); + while (node) { + + se = rb_entry(node, struct sched_entity, seq_node); + /* * We don't want to steal all, otherwise we may be treated likewise, * which could at worst lead to a livelock crash. @@ -6896,7 +6916,7 @@ static int detach_tasks(struct lb_env *env) if (env->idle != CPU_NOT_IDLE && env->src_rq->nr_running <= 1) break;
- p = list_first_entry(tasks, struct task_struct, se.group_node); + p = task_of(se);
env->loop++; /* We've more or less seen every task there is, call it quits */ @@ -6944,9 +6964,10 @@ static int detach_tasks(struct lb_env *env) if (env->imbalance <= 0) break;
+ node = rb_first(&env->src_rq->seq_node); continue; next: - list_move_tail(&p->se.group_node, tasks); + node = rb_next(&se->seq_node); }
/* -- 1.9.1