Hi Vikram,
On Thu, Nov 17, 2016 at 03:03:02PM -0800, Vikram Mulukutla wrote:
On 11/16/2016 10:04 PM, Leo Yan wrote:
The function walt_fixup_busy_time() manipulates source and destination RQ at the same time, so this inheritly requires to acquire two rq locks together for exclusive accessing. The two rq locks are acquired by function double_lock_balance(), but it firstly releases source rq lock and then acquire two locks with specific sequence to avoid recursive deadlock.
This gives chance for other CPU to acquire this lock during double_lock_balance(); it's not safe for below usage for pointer dereference:
raw_spin_lock(&rq->lock); node = get_one_pointer_from_rq(); double_lock_balance(); -> broken atomicity double_unlock_balance(); use_pointer(node); -> may introduce error or panic -> due node has been manipulated -> by other CPU. raw_spin_unlock(&rq->lock);
So this patch is to port Vikram's latest WALT patches to AOSP-4.4, which use functions walt_prepare_migrate() and walt_finish_migrate() to replace walt_fixup_busy_time().
Thanks for doing this! I'm fine with the commit text, but please switch the subject the line to just say 'don't use double lock balance'. We haven't broken anything in the baseline kernel, just that the new code will have a problem.
Sure, will do this.
Signed-off-by: Vikram Mulukutla markivx@codeaurora.org Signed-off-by: Leo Yan leo.yan@linaro.org
kernel/sched/core.c | 10 ++++++---- kernel/sched/deadline.c | 8 ++++++++ kernel/sched/fair.c | 4 ++-- kernel/sched/rt.c | 4 ++++ 4 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 951a4e4..10f36e2 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1086,15 +1086,15 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new dequeue_task(rq, p, 0); p->on_rq = TASK_ON_RQ_MIGRATING;
- double_lock_balance(rq, cpu_rq(new_cpu));
- walt_prepare_migrate(p, rq, true); set_task_cpu(p, new_cpu);
- double_unlock_balance(rq, cpu_rq(new_cpu)); raw_spin_unlock(&rq->lock); rq = cpu_rq(new_cpu); raw_spin_lock(&rq->lock); BUG_ON(task_cpu(p) != new_cpu);
- walt_finish_migrate(p, rq, true); p->on_rq = TASK_ON_RQ_QUEUED; enqueue_task(rq, p, 0); check_preempt_curr(rq, p, 0);
@@ -1312,8 +1312,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) p->sched_class->migrate_task_rq(p); p->se.nr_migrations++; perf_event_task_migrate(p);
} __set_task_cpu(p, new_cpu);walt_fixup_busy_time(p, new_cpu);
@@ -1328,7 +1326,9 @@ static void __migrate_swap_task(struct task_struct *p, int cpu) dst_rq = cpu_rq(cpu); deactivate_task(src_rq, p, 0);
set_task_cpu(p, cpu);walt_prepare_migrate(p, src_rq, true);
activate_task(dst_rq, p, 0); check_preempt_curr(dst_rq, p, 0); } else {walt_finish_migrate(p, dst_rq, true);
@@ -2021,7 +2021,9 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) if (task_cpu(p) != cpu) { wake_flags |= WF_MIGRATED;
set_task_cpu(p, cpu);walt_prepare_migrate(p, task_rq(p), false);
}walt_finish_migrate(p, cpu_rq(cpu), false);
#endif /* CONFIG_SMP */ diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 9d9eb50..ad29d29 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -18,6 +18,8 @@ #include <linux/slab.h> +#include "walt.h"
struct dl_bandwidth def_dl_bandwidth; static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) @@ -290,7 +292,9 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p * By now the task is replenished and enqueued; migrate it. */ deactivate_task(rq, p, 0);
- walt_prepare_migrate(p, rq, true); set_task_cpu(p, later_rq->cpu);
- walt_finish_migrate(p, later_rq, true); activate_task(later_rq, p, 0); if (!fallback)
@@ -1580,7 +1584,9 @@ retry: deactivate_task(rq, next_task, 0); clear_average_bw(&next_task->dl, &rq->dl);
- walt_prepare_migrate(next_task, rq, true); set_task_cpu(next_task, later_rq->cpu);
- walt_finish_migrate(next_task, later_rq, true); add_average_bw(&next_task->dl, &later_rq->dl); activate_task(later_rq, next_task, 0); ret = 1;
@@ -1670,7 +1676,9 @@ static void pull_dl_task(struct rq *this_rq) deactivate_task(src_rq, p, 0); clear_average_bw(&p->dl, &src_rq->dl);
walt_prepare_migrate(p, src_rq, true); set_task_cpu(p, this_cpu);
walt_finish_migrate(p, this_rq, true); add_average_bw(&p->dl, &this_rq->dl); activate_task(this_rq, p, 0); dmin = p->dl.deadline;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8d568b0..df9b531 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6726,9 +6726,8 @@ static void detach_task(struct task_struct *p, struct lb_env *env) deactivate_task(env->src_rq, p, 0); p->on_rq = TASK_ON_RQ_MIGRATING;
- double_lock_balance(env->src_rq, env->dst_rq);
- walt_prepare_migrate(p, env->src_rq, true); set_task_cpu(p, env->dst_cpu);
- double_unlock_balance(env->src_rq, env->dst_rq);
} /* @@ -6881,6 +6880,7 @@ static void attach_task(struct rq *rq, struct task_struct *p) lockdep_assert_held(&rq->lock); BUG_ON(task_rq(p) != rq);
- walt_finish_migrate(p, rq, true); p->on_rq = TASK_ON_RQ_QUEUED; activate_task(rq, p, 0); check_preempt_curr(rq, p, 0);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 8a16cba..564b5fd 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1879,7 +1879,9 @@ retry: } deactivate_task(rq, next_task, 0);
- walt_prepare_migrate(next_task, rq, true); set_task_cpu(next_task, lowest_rq->cpu);
- walt_finish_migrate(next_task, lowest_rq, true); activate_task(lowest_rq, next_task, 0); ret = 1;
@@ -2133,7 +2135,9 @@ static void pull_rt_task(struct rq *this_rq) resched = true; deactivate_task(src_rq, p, 0);
walt_prepare_migrate(p, src_rq, true); set_task_cpu(p, this_cpu);
walt_finish_migrate(p, this_rq, true); activate_task(this_rq, p, 0); /* * We continue with the search, just in
Provided the subject line is fixed, ACK from me.
Thanks for ACK :)
Thanks, Vikram