When a normal forced up-migration takes place we stop the task to be migrated while the target CPU becomes available. This delay can range from 80us to 1500us on TC2 if the target CPU is in a deep idle state.
Instead, interrupt the target CPU and ask it to pull a task. This lets the current eligible task continue executing on the original CPU while the target CPU wakes. Use a pinned timer to prevent the pulling CPU going back into power-down with pending up-migrations.
If we trigger for a nohz kick, it doesn't matter about triggering for an idle pull since the idle_pull flag will be set when we execute the softirq and we'll still do the idle pull.
If the target CPU is busy, we will not pull any tasks.
Signed-off-by: Chris Redpath chris.redpath@arm.com --- kernel/sched/core.c | 12 +++++- kernel/sched/fair.c | 100 ++++++++++++++++++++++++++++++++++++++++++++------ kernel/sched/sched.h | 1 + 3 files changed, 101 insertions(+), 12 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index de9d360..2a74474 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1407,7 +1407,11 @@ void scheduler_ipi(void) { if (llist_empty(&this_rq()->wake_list) && !tick_nohz_full_cpu(smp_processor_id()) - && !got_nohz_idle_kick()) + && !got_nohz_idle_kick() +#ifdef CONFIG_SCHED_HMP + && !this_rq()->wake_for_idle_pull +#endif + ) return;
/* @@ -1434,6 +1438,12 @@ void scheduler_ipi(void) this_rq()->idle_balance = 1; raise_softirq_irqoff(SCHED_SOFTIRQ); } +#ifdef CONFIG_SCHED_HMP + else if (unlikely(this_rq()->wake_for_idle_pull)) { + raise_softirq_irqoff(SCHED_SOFTIRQ); + } +#endif + irq_exit(); }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4e3686b..b22906c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -39,6 +39,9 @@ */ #include <linux/cpufreq.h> #endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */ +#ifdef CONFIG_SCHED_HMP +#include <linux/cpuidle.h> +#endif
#include "sched.h"
@@ -3541,6 +3544,66 @@ static const int hmp_max_tasks = 5;
extern void __init arch_get_hmp_domains(struct list_head *hmp_domains_list);
+struct hmp_keepalive { + bool init; + ktime_t delay; + struct hrtimer timer; +}; +DEFINE_PER_CPU(struct hmp_keepalive, hmp_cpu_keepalive); + +/* setup per-cpu keepalive timers */ +static enum hrtimer_restart hmp_cpu_keepalive_notify(struct hrtimer *hrtimer) +{ + return HRTIMER_NORESTART; +} + +static void hmp_keepalive_delay(unsigned int *ns_delay) +{ + struct cpuidle_driver *drv; + drv = cpuidle_driver_ref(); + if (drv) { + unsigned int us_next = UINT_MAX; + unsigned int us_least = UINT_MAX; + unsigned int ns_next; + int idx; + for (idx = 0; idx < drv->state_count; idx++) { + if (drv->states[idx].target_residency < us_least) { + us_least = drv->states[idx].target_residency; + } + } + for (idx = 0; idx < drv->state_count; idx++) { + if (drv->states[idx].target_residency > us_least && + drv->states[idx].target_residency < us_next) { + us_next = drv->states[idx].target_residency; + } + } + if (us_next) + us_next>>=1; + ns_next = us_next << 10; + *ns_delay = *ns_delay < ns_next ? *ns_delay : ns_next; + printk("keepalive: use duration %u for cpu%d\n", *ns_delay, smp_processor_id()); + } + cpuidle_driver_unref(); +} + +static void hmp_cpu_keepalive_trigger(void) +{ + int cpu = smp_processor_id(); + struct hmp_keepalive *keepalive = &per_cpu(hmp_cpu_keepalive, cpu); + if (!keepalive->init) { + unsigned int ns_delay = 1000000; + + hrtimer_init(&keepalive->timer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + keepalive->timer.function = hmp_cpu_keepalive_notify; + hmp_keepalive_delay(&ns_delay); + keepalive->delay = ns_to_ktime(ns_delay); + keepalive->init = true; + } + hrtimer_start(&keepalive->timer, + keepalive->delay, HRTIMER_MODE_REL_PINNED); +} + /* Setup hmp_domains */ static int __init hmp_cpu_mask_setup(void) { @@ -3598,9 +3661,13 @@ static void hmp_online_cpu(int cpu) static void hmp_offline_cpu(int cpu) { struct hmp_domain *domain = hmp_get_hmp_domain_for_cpu(cpu); + struct hmp_keepalive *keepalive = &per_cpu(hmp_cpu_keepalive, cpu);
if(domain) cpumask_clear_cpu(cpu, &domain->cpus); + + if (keepalive->init) + hrtimer_cancel(&keepalive->timer); } /* * Needed to determine heaviest tasks etc. @@ -7022,7 +7089,7 @@ static void hmp_force_up_migration(int this_cpu) target = cpu_rq(cpu); raw_spin_lock_irqsave(&target->lock, flags); curr = target->cfs.curr; - if (!curr) { + if (!curr || target->active_balance) { raw_spin_unlock_irqrestore(&target->lock, flags); continue; } @@ -7039,16 +7106,13 @@ static void hmp_force_up_migration(int this_cpu) curr = hmp_get_heaviest_task(curr, 1); p = task_of(curr); if (hmp_up_migration(cpu, &target_cpu, curr)) { - if (!target->active_balance) { - get_task_struct(p); - target->push_cpu = target_cpu; - target->migrate_task = p; - got_target = 1; - trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_FORCE); - hmp_next_up_delay(&p->se, target->push_cpu); - } + cpu_rq(target_cpu)->wake_for_idle_pull = 1; + raw_spin_unlock_irqrestore(&target->lock, flags); + spin_unlock(&hmp_force_migration); + smp_send_reschedule(target_cpu); + return; } - if (!got_target && !target->active_balance) { + if (!got_target) { /* * For now we just check the currently running task. * Selecting the lightest task for offloading will @@ -7070,7 +7134,7 @@ static void hmp_force_up_migration(int this_cpu) * is not currently running move it, otherwise let the * CPU stopper take care of it. */ - if (got_target && !target->active_balance) { + if (got_target) { if (!task_running(target, p)) { trace_sched_hmp_migrate_force_running(p, 0); hmp_migrate_runnable_task(target); @@ -7176,6 +7240,8 @@ static unsigned int hmp_idle_pull(int this_cpu) raw_spin_unlock_irqrestore(&target->lock, flags);
if (force) { + /* start timer to keep us awake */ + hmp_cpu_keepalive_trigger(); stop_one_cpu_nowait(cpu_of(target), hmp_active_task_migration_cpu_stop, target, &target->active_balance_work); @@ -7199,6 +7265,18 @@ static void run_rebalance_domains(struct softirq_action *h) enum cpu_idle_type idle = this_rq->idle_balance ? CPU_IDLE : CPU_NOT_IDLE;
+#ifdef CONFIG_SCHED_HMP + /* shortcut for hmp idle pull wakeups */ + if (unlikely(this_rq->wake_for_idle_pull)) { + this_rq->wake_for_idle_pull = 0; + if (hmp_idle_pull(this_cpu)) { + /* break out unless running nohz idle as well */ + if (idle != CPU_IDLE) + return; + } + } +#endif + hmp_force_up_migration(this_cpu);
rebalance_domains(this_cpu, idle); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 989c5ae..0d19ede 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -466,6 +466,7 @@ struct rq { struct cpu_stop_work active_balance_work; #ifdef CONFIG_SCHED_HMP struct task_struct *migrate_task; + int wake_for_idle_pull; #endif /* cpu of this runqueue: */ int cpu;