Hi Tixy,
Here's the set all squashed together where it should be.
--Chris
[PATCH 1/4] hmp: sched: Clean up hmp_up_threshold checks into a [PATCH 2/4] sched: hmp: unify active migration code [PATCH 3/4] hmp: Use idle pull to perform forced up-migrations [PATCH 4/4] hmp: dont attempt to pull tasks if affinity doesn't
In anticipation of modifying the up_threshold handling, make all instances use the same utility fn to check if a task is eligible for up-migration. This also removes the previous difference in threshold comparison where up-migration used '!<threshold' and idle pull used '>threshold' to decide up-migration eligibility. Make them both use '!<threshold' instead for consistency, although this is unlikely to change any results.
Signed-off-by: Chris Redpath chris.redpath@arm.com --- kernel/sched/fair.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 62a8808..29e2c74 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6734,6 +6734,14 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { } #endif
#ifdef CONFIG_SCHED_HMP +static unsigned int hmp_task_eligible_for_up_migration(struct sched_entity *se) +{ + /* below hmp_up_threshold, never eligible */ + if (se->avg.load_avg_ratio < hmp_up_threshold) + return 0; + return 1; +} + /* Check if task should migrate to a faster cpu */ static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se) { @@ -6749,7 +6757,7 @@ static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_enti if (p->prio >= hmp_up_prio) return 0; #endif - if (se->avg.load_avg_ratio < hmp_up_threshold) + if (!hmp_task_eligible_for_up_migration(se)) return 0;
/* Let the task load settle before doing another up migration */ @@ -7237,7 +7245,10 @@ static unsigned int hmp_idle_pull(int this_cpu) } orig = curr; curr = hmp_get_heaviest_task(curr, 1); - if (curr->avg.load_avg_ratio > hmp_up_threshold && + /* check if heaviest eligible task on this + * CPU is heavier than previous task + */ + if (hmp_task_eligible_for_up_migration(curr) && curr->avg.load_avg_ratio > ratio) { p = task_of(curr); target = rq;
The HMP active migration code is functionally identical to the CFS active migration code apart from one flag check. Share the code and make the flag check optional.
Two wrapper functions allow the flag check to be present or not.
Signed-off-by: Chris Redpath chris.redpath@arm.com --- kernel/sched/fair.c | 188 +++++++++++---------------------------------------- 1 file changed, 40 insertions(+), 148 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 29e2c74..4e3686b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6214,6 +6214,7 @@ out: #ifdef CONFIG_SCHED_HMP static unsigned int hmp_idle_pull(int this_cpu); #endif +static int move_specific_task(struct lb_env *env, struct task_struct *pm); /* * idle_balance is called by schedule() if this_cpu is about to become * idle. Attempts to pull tasks from other CPUs. @@ -6273,21 +6274,17 @@ void idle_balance(int this_cpu, struct rq *this_rq) } }
-/* - * active_load_balance_cpu_stop is run by cpu stopper. It pushes - * running tasks off the busiest CPU onto idle CPUs. It requires at - * least 1 task to be running on each physical CPU where possible, and - * avoids physical / logical imbalances. - */ -static int active_load_balance_cpu_stop(void *data) +static int __do_active_load_balance_cpu_stop(void *data, bool check_sd_lb_flag) { struct rq *busiest_rq = data; int busiest_cpu = cpu_of(busiest_rq); int target_cpu = busiest_rq->push_cpu; struct rq *target_rq = cpu_rq(target_cpu); struct sched_domain *sd; + struct task_struct *p;
raw_spin_lock_irq(&busiest_rq->lock); + p = busiest_rq->migrate_task;
/* make sure the requested cpu hasn't gone down in the meantime */ if (unlikely(busiest_cpu != smp_processor_id() || @@ -6298,6 +6295,11 @@ static int active_load_balance_cpu_stop(void *data) if (busiest_rq->nr_running <= 1) goto out_unlock;
+ if (!check_sd_lb_flag) { + /* Task has migrated meanwhile, abort forced migration */ + if (task_rq(p) != busiest_rq) + goto out_unlock; + } /* * This condition is "impossible", if it occurs * we need to fix it. Originally reported by @@ -6311,12 +6313,14 @@ static int active_load_balance_cpu_stop(void *data) /* Search for an sd spanning us and the target CPU. */ rcu_read_lock(); for_each_domain(target_cpu, sd) { - if ((sd->flags & SD_LOAD_BALANCE) && - cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) + if (((check_sd_lb_flag && sd->flags & SD_LOAD_BALANCE) || + !check_sd_lb_flag) && + cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) break; }
if (likely(sd)) { + bool success = false; struct lb_env env = { .sd = sd, .dst_cpu = target_cpu, @@ -6328,7 +6332,14 @@ static int active_load_balance_cpu_stop(void *data)
schedstat_inc(sd, alb_count);
- if (move_one_task(&env)) + if (check_sd_lb_flag) { + if (move_one_task(&env)) + success = true; + } else { + if (move_specific_task(&env, p)) + success = true; + } + if (success) schedstat_inc(sd, alb_pushed); else schedstat_inc(sd, alb_failed); @@ -6336,11 +6347,24 @@ static int active_load_balance_cpu_stop(void *data) rcu_read_unlock(); double_unlock_balance(busiest_rq, target_rq); out_unlock: + if (!check_sd_lb_flag) + put_task_struct(p); busiest_rq->active_balance = 0; raw_spin_unlock_irq(&busiest_rq->lock); return 0; }
+/* + * active_load_balance_cpu_stop is run by cpu stopper. It pushes + * running tasks off the busiest CPU onto idle CPUs. It requires at + * least 1 task to be running on each physical CPU where possible, and + * avoids physical / logical imbalances. + */ +static int active_load_balance_cpu_stop(void *data) +{ + return __do_active_load_balance_cpu_stop(data, true); +} + #ifdef CONFIG_NO_HZ_COMMON /* * idle load balancing details @@ -6901,151 +6925,19 @@ static int move_specific_task(struct lb_env *env, struct task_struct *pm) * hmp_active_task_migration_cpu_stop is run by cpu stopper and used to * migrate a specific task from one runqueue to another. * hmp_force_up_migration uses this to push a currently running task - * off a runqueue. - * Based on active_load_balance_stop_cpu and can potentially be merged. + * off a runqueue. hmp_idle_pull uses this to pull a currently + * running task to an idle runqueue. + * Reuses __do_active_load_balance_cpu_stop to actually do the work. */ static int hmp_active_task_migration_cpu_stop(void *data) { - struct rq *busiest_rq = data; - struct task_struct *p = busiest_rq->migrate_task; - int busiest_cpu = cpu_of(busiest_rq); - int target_cpu = busiest_rq->push_cpu; - struct rq *target_rq = cpu_rq(target_cpu); - struct sched_domain *sd; - - raw_spin_lock_irq(&busiest_rq->lock); - /* make sure the requested cpu hasn't gone down in the meantime */ - if (unlikely(busiest_cpu != smp_processor_id() || - !busiest_rq->active_balance)) { - goto out_unlock; - } - /* Is there any task to move? */ - if (busiest_rq->nr_running <= 1) - goto out_unlock; - /* Task has migrated meanwhile, abort forced migration */ - if (task_rq(p) != busiest_rq) - goto out_unlock; - /* - * This condition is "impossible", if it occurs - * we need to fix it. Originally reported by - * Bjorn Helgaas on a 128-cpu setup. - */ - BUG_ON(busiest_rq == target_rq); - - /* move a task from busiest_rq to target_rq */ - double_lock_balance(busiest_rq, target_rq); - - /* Search for an sd spanning us and the target CPU. */ - rcu_read_lock(); - for_each_domain(target_cpu, sd) { - if (cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) - break; - } - - if (likely(sd)) { - struct lb_env env = { - .sd = sd, - .dst_cpu = target_cpu, - .dst_rq = target_rq, - .src_cpu = busiest_rq->cpu, - .src_rq = busiest_rq, - .idle = CPU_IDLE, - }; - - schedstat_inc(sd, alb_count); - - if (move_specific_task(&env, p)) - schedstat_inc(sd, alb_pushed); - else - schedstat_inc(sd, alb_failed); - } - rcu_read_unlock(); - double_unlock_balance(busiest_rq, target_rq); -out_unlock: - put_task_struct(p); - busiest_rq->active_balance = 0; - raw_spin_unlock_irq(&busiest_rq->lock); - return 0; -} - -/* - * hmp_idle_pull_cpu_stop is run by cpu stopper and used to - * migrate a specific task from one runqueue to another. - * hmp_idle_pull uses this to push a currently running task - * off a runqueue to a faster CPU. - * Locking is slightly different than usual. - * Based on active_load_balance_stop_cpu and can potentially be merged. - */ -static int hmp_idle_pull_cpu_stop(void *data) -{ - struct rq *busiest_rq = data; - struct task_struct *p = busiest_rq->migrate_task; - int busiest_cpu = cpu_of(busiest_rq); - int target_cpu = busiest_rq->push_cpu; - struct rq *target_rq = cpu_rq(target_cpu); - struct sched_domain *sd; - - raw_spin_lock_irq(&busiest_rq->lock); - - /* make sure the requested cpu hasn't gone down in the meantime */ - if (unlikely(busiest_cpu != smp_processor_id() || - !busiest_rq->active_balance)) - goto out_unlock; - - /* Is there any task to move? */ - if (busiest_rq->nr_running <= 1) - goto out_unlock; - - /* Task has migrated meanwhile, abort forced migration */ - if (task_rq(p) != busiest_rq) - goto out_unlock; - - /* - * This condition is "impossible", if it occurs - * we need to fix it. Originally reported by - * Bjorn Helgaas on a 128-cpu setup. - */ - BUG_ON(busiest_rq == target_rq); - - /* move a task from busiest_rq to target_rq */ - double_lock_balance(busiest_rq, target_rq); - - /* Search for an sd spanning us and the target CPU. */ - rcu_read_lock(); - for_each_domain(target_cpu, sd) { - if (cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) - break; - } - if (likely(sd)) { - struct lb_env env = { - .sd = sd, - .dst_cpu = target_cpu, - .dst_rq = target_rq, - .src_cpu = busiest_rq->cpu, - .src_rq = busiest_rq, - .idle = CPU_IDLE, - }; - - schedstat_inc(sd, alb_count); - - if (move_specific_task(&env, p)) - schedstat_inc(sd, alb_pushed); - else - schedstat_inc(sd, alb_failed); - } - rcu_read_unlock(); - double_unlock_balance(busiest_rq, target_rq); -out_unlock: - put_task_struct(p); - busiest_rq->active_balance = 0; - raw_spin_unlock_irq(&busiest_rq->lock); - return 0; + return __do_active_load_balance_cpu_stop(data, false); }
/* * Move task in a runnable state to another CPU. * - * Tailored on 'active_load_balance_stop_cpu' with slight + * Tailored on 'active_load_balance_cpu_stop' with slight * modification to locking and pre-transfer checks. Note * rq->lock must be held before calling. */ @@ -7285,7 +7177,7 @@ static unsigned int hmp_idle_pull(int this_cpu)
if (force) { stop_one_cpu_nowait(cpu_of(target), - hmp_idle_pull_cpu_stop, + hmp_active_task_migration_cpu_stop, target, &target->active_balance_work); } done:
When a normal forced up-migration takes place we stop the task to be migrated while the target CPU becomes available. This delay can range from 80us to 1500us on TC2 if the target CPU is in a deep idle state.
Instead, interrupt the target CPU and ask it to pull a task. This lets the current eligible task continue executing on the original CPU while the target CPU wakes. Use a pinned timer to prevent the pulling CPU going back into power-down with pending up-migrations.
If we trigger for a nohz kick, it doesn't matter about triggering for an idle pull since the idle_pull flag will be set when we execute the softirq and we'll still do the idle pull.
If the target CPU is busy, we will not pull any tasks.
Signed-off-by: Chris Redpath chris.redpath@arm.com --- kernel/sched/core.c | 12 +++++- kernel/sched/fair.c | 100 ++++++++++++++++++++++++++++++++++++++++++++------ kernel/sched/sched.h | 1 + 3 files changed, 101 insertions(+), 12 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index de9d360..2a74474 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1407,7 +1407,11 @@ void scheduler_ipi(void) { if (llist_empty(&this_rq()->wake_list) && !tick_nohz_full_cpu(smp_processor_id()) - && !got_nohz_idle_kick()) + && !got_nohz_idle_kick() +#ifdef CONFIG_SCHED_HMP + && !this_rq()->wake_for_idle_pull +#endif + ) return;
/* @@ -1434,6 +1438,12 @@ void scheduler_ipi(void) this_rq()->idle_balance = 1; raise_softirq_irqoff(SCHED_SOFTIRQ); } +#ifdef CONFIG_SCHED_HMP + else if (unlikely(this_rq()->wake_for_idle_pull)) { + raise_softirq_irqoff(SCHED_SOFTIRQ); + } +#endif + irq_exit(); }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4e3686b..b22906c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -39,6 +39,9 @@ */ #include <linux/cpufreq.h> #endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */ +#ifdef CONFIG_SCHED_HMP +#include <linux/cpuidle.h> +#endif
#include "sched.h"
@@ -3541,6 +3544,66 @@ static const int hmp_max_tasks = 5;
extern void __init arch_get_hmp_domains(struct list_head *hmp_domains_list);
+struct hmp_keepalive { + bool init; + ktime_t delay; + struct hrtimer timer; +}; +DEFINE_PER_CPU(struct hmp_keepalive, hmp_cpu_keepalive); + +/* setup per-cpu keepalive timers */ +static enum hrtimer_restart hmp_cpu_keepalive_notify(struct hrtimer *hrtimer) +{ + return HRTIMER_NORESTART; +} + +static void hmp_keepalive_delay(unsigned int *ns_delay) +{ + struct cpuidle_driver *drv; + drv = cpuidle_driver_ref(); + if (drv) { + unsigned int us_next = UINT_MAX; + unsigned int us_least = UINT_MAX; + unsigned int ns_next; + int idx; + for (idx = 0; idx < drv->state_count; idx++) { + if (drv->states[idx].target_residency < us_least) { + us_least = drv->states[idx].target_residency; + } + } + for (idx = 0; idx < drv->state_count; idx++) { + if (drv->states[idx].target_residency > us_least && + drv->states[idx].target_residency < us_next) { + us_next = drv->states[idx].target_residency; + } + } + if (us_next) + us_next>>=1; + ns_next = us_next << 10; + *ns_delay = *ns_delay < ns_next ? *ns_delay : ns_next; + printk("keepalive: use duration %u for cpu%d\n", *ns_delay, smp_processor_id()); + } + cpuidle_driver_unref(); +} + +static void hmp_cpu_keepalive_trigger(void) +{ + int cpu = smp_processor_id(); + struct hmp_keepalive *keepalive = &per_cpu(hmp_cpu_keepalive, cpu); + if (!keepalive->init) { + unsigned int ns_delay = 1000000; + + hrtimer_init(&keepalive->timer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + keepalive->timer.function = hmp_cpu_keepalive_notify; + hmp_keepalive_delay(&ns_delay); + keepalive->delay = ns_to_ktime(ns_delay); + keepalive->init = true; + } + hrtimer_start(&keepalive->timer, + keepalive->delay, HRTIMER_MODE_REL_PINNED); +} + /* Setup hmp_domains */ static int __init hmp_cpu_mask_setup(void) { @@ -3598,9 +3661,13 @@ static void hmp_online_cpu(int cpu) static void hmp_offline_cpu(int cpu) { struct hmp_domain *domain = hmp_get_hmp_domain_for_cpu(cpu); + struct hmp_keepalive *keepalive = &per_cpu(hmp_cpu_keepalive, cpu);
if(domain) cpumask_clear_cpu(cpu, &domain->cpus); + + if (keepalive->init) + hrtimer_cancel(&keepalive->timer); } /* * Needed to determine heaviest tasks etc. @@ -7022,7 +7089,7 @@ static void hmp_force_up_migration(int this_cpu) target = cpu_rq(cpu); raw_spin_lock_irqsave(&target->lock, flags); curr = target->cfs.curr; - if (!curr) { + if (!curr || target->active_balance) { raw_spin_unlock_irqrestore(&target->lock, flags); continue; } @@ -7039,16 +7106,13 @@ static void hmp_force_up_migration(int this_cpu) curr = hmp_get_heaviest_task(curr, 1); p = task_of(curr); if (hmp_up_migration(cpu, &target_cpu, curr)) { - if (!target->active_balance) { - get_task_struct(p); - target->push_cpu = target_cpu; - target->migrate_task = p; - got_target = 1; - trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_FORCE); - hmp_next_up_delay(&p->se, target->push_cpu); - } + cpu_rq(target_cpu)->wake_for_idle_pull = 1; + raw_spin_unlock_irqrestore(&target->lock, flags); + spin_unlock(&hmp_force_migration); + smp_send_reschedule(target_cpu); + return; } - if (!got_target && !target->active_balance) { + if (!got_target) { /* * For now we just check the currently running task. * Selecting the lightest task for offloading will @@ -7070,7 +7134,7 @@ static void hmp_force_up_migration(int this_cpu) * is not currently running move it, otherwise let the * CPU stopper take care of it. */ - if (got_target && !target->active_balance) { + if (got_target) { if (!task_running(target, p)) { trace_sched_hmp_migrate_force_running(p, 0); hmp_migrate_runnable_task(target); @@ -7176,6 +7240,8 @@ static unsigned int hmp_idle_pull(int this_cpu) raw_spin_unlock_irqrestore(&target->lock, flags);
if (force) { + /* start timer to keep us awake */ + hmp_cpu_keepalive_trigger(); stop_one_cpu_nowait(cpu_of(target), hmp_active_task_migration_cpu_stop, target, &target->active_balance_work); @@ -7199,6 +7265,18 @@ static void run_rebalance_domains(struct softirq_action *h) enum cpu_idle_type idle = this_rq->idle_balance ? CPU_IDLE : CPU_NOT_IDLE;
+#ifdef CONFIG_SCHED_HMP + /* shortcut for hmp idle pull wakeups */ + if (unlikely(this_rq->wake_for_idle_pull)) { + this_rq->wake_for_idle_pull = 0; + if (hmp_idle_pull(this_cpu)) { + /* break out unless running nohz idle as well */ + if (idle != CPU_IDLE) + return; + } + } +#endif + hmp_force_up_migration(this_cpu);
rebalance_domains(this_cpu, idle); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 989c5ae..0d19ede 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -466,6 +466,7 @@ struct rq { struct cpu_stop_work active_balance_work; #ifdef CONFIG_SCHED_HMP struct task_struct *migrate_task; + int wake_for_idle_pull; #endif /* cpu of this runqueue: */ int cpu;
On Tue, 2014-03-18 at 11:29 +0000, Chris Redpath wrote:
When a normal forced up-migration takes place we stop the task to be migrated while the target CPU becomes available. This delay can range from 80us to 1500us on TC2 if the target CPU is in a deep idle state.
Instead, interrupt the target CPU and ask it to pull a task. This lets the current eligible task continue executing on the original CPU while the target CPU wakes. Use a pinned timer to prevent the pulling CPU going back into power-down with pending up-migrations.
If we trigger for a nohz kick, it doesn't matter about triggering for an idle pull since the idle_pull flag will be set when we execute the softirq and we'll still do the idle pull.
If the target CPU is busy, we will not pull any tasks.
Signed-off-by: Chris Redpath chris.redpath@arm.com
kernel/sched/core.c | 12 +++++- kernel/sched/fair.c | 100 ++++++++++++++++++++++++++++++++++++++++++++------ kernel/sched/sched.h | 1 + 3 files changed, 101 insertions(+), 12 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index de9d360..2a74474 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1407,7 +1407,11 @@ void scheduler_ipi(void) { if (llist_empty(&this_rq()->wake_list) && !tick_nohz_full_cpu(smp_processor_id())
&& !got_nohz_idle_kick())
&& !got_nohz_idle_kick()
+#ifdef CONFIG_SCHED_HMP
&& !this_rq()->wake_for_idle_pull
+#endif
return;)
/* @@ -1434,6 +1438,12 @@ void scheduler_ipi(void) this_rq()->idle_balance = 1; raise_softirq_irqoff(SCHED_SOFTIRQ); } +#ifdef CONFIG_SCHED_HMP
- else if (unlikely(this_rq()->wake_for_idle_pull)) {
raise_softirq_irqoff(SCHED_SOFTIRQ);
- }
+#endif
- irq_exit();
} diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4e3686b..b22906c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -39,6 +39,9 @@ */ #include <linux/cpufreq.h> #endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */ +#ifdef CONFIG_SCHED_HMP +#include <linux/cpuidle.h> +#endif #include "sched.h" @@ -3541,6 +3544,66 @@ static const int hmp_max_tasks = 5; extern void __init arch_get_hmp_domains(struct list_head *hmp_domains_list); +struct hmp_keepalive {
- bool init;
- ktime_t delay;
- struct hrtimer timer;
+}; +DEFINE_PER_CPU(struct hmp_keepalive, hmp_cpu_keepalive);
+/* setup per-cpu keepalive timers */ +static enum hrtimer_restart hmp_cpu_keepalive_notify(struct hrtimer *hrtimer) +{
- return HRTIMER_NORESTART;
+}
+static void hmp_keepalive_delay(unsigned int *ns_delay) +{
- struct cpuidle_driver *drv;
- drv = cpuidle_driver_ref();
Doesn't this mean that SCHED_HMP now needs to depend on CPU_IDLE? (In the KConfigs for both arm and arm64 arches.) Or, considering this is a tuning, erm... 'hack' I guess is the right word ;-) how about making this function an inline nop when CPU_IDLE is not defined?
- if (drv) {
unsigned int us_next = UINT_MAX;
unsigned int us_least = UINT_MAX;
unsigned int ns_next;
int idx;
for (idx = 0; idx < drv->state_count; idx++) {
if (drv->states[idx].target_residency < us_least) {
us_least = drv->states[idx].target_residency;
}
}
for (idx = 0; idx < drv->state_count; idx++) {
if (drv->states[idx].target_residency > us_least &&
drv->states[idx].target_residency < us_next) {
us_next = drv->states[idx].target_residency;
}
}
if (us_next)
You don't need the above check, shifting zero right one gives zero :-)
us_next>>=1;
ns_next = us_next << 10;
I know accuracy doesn't really matter, but what's wrong with *1000 rather than <<10, not like we couldn't afford the one off cost of a clock cycle or two.
Also, what is the reasoning behind picking half the time of the second lowest residency time? And, that reasoning should probably be mentioned in the code as a comment.
[...]
@@ -7199,6 +7265,18 @@ static void run_rebalance_domains(struct softirq_action *h) enum cpu_idle_type idle = this_rq->idle_balance ? CPU_IDLE : CPU_NOT_IDLE; +#ifdef CONFIG_SCHED_HMP
- /* shortcut for hmp idle pull wakeups */
- if (unlikely(this_rq->wake_for_idle_pull)) {
this_rq->wake_for_idle_pull = 0;
if (hmp_idle_pull(this_cpu)) {
/* break out unless running nohz idle as well */
if (idle != CPU_IDLE)
return;
}
- }
+#endif
I don't understand the scheduler enough to work out if the above is OK. Could we miss out doing needed work if both idle pull and rebalance work were triggered close in time? I'll try and study the code some more.
hmp_force_up_migration(this_cpu); rebalance_domains(this_cpu, idle);
When looking for a task to be idle-pulled, don't consider tasks where the affinity does not allow that task to be placed on the target CPU. Also ensure that tasks with restricted affinity do not block selecting other unrestricted busy tasks.
Use the knowledge of target CPU more effectively in idle pull by passing to hmp_get_heaviest_task when we know it, otherwise only checking for general affinity matches with any of the CPUs in the bigger HMP domain.
We still need to explicitly check affinity is allowed in idle pull since if we find no match in hmp_get_heaviest_task we will return the current one, which may not be affine to the new CPU despite having high enough load. In this case, there is nothing to move.
Signed-off-by: Chris Redpath chris.redpath@arm.com --- kernel/sched/fair.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b22906c..431c3c9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3679,30 +3679,30 @@ static inline struct hmp_domain *hmp_faster_domain(int cpu);
/* must hold runqueue lock for queue se is currently on */ static struct sched_entity *hmp_get_heaviest_task( - struct sched_entity *se, int migrate_up) + struct sched_entity *se, int target_cpu) { int num_tasks = hmp_max_tasks; struct sched_entity *max_se = se; unsigned long int max_ratio = se->avg.load_avg_ratio; const struct cpumask *hmp_target_mask = NULL; + struct hmp_domain *hmp;
- if (migrate_up) { - struct hmp_domain *hmp; - if (hmp_cpu_is_fastest(cpu_of(se->cfs_rq->rq))) - return max_se; + if (hmp_cpu_is_fastest(cpu_of(se->cfs_rq->rq))) + return max_se; + + hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq)); + hmp_target_mask = &hmp->cpus;
- hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq)); - hmp_target_mask = &hmp->cpus; - } /* The currently running task is not on the runqueue */ se = __pick_first_entity(cfs_rq_of(se));
while (num_tasks && se) { if (entity_is_task(se) && - (se->avg.load_avg_ratio > max_ratio && - hmp_target_mask && - cpumask_intersects(hmp_target_mask, - tsk_cpus_allowed(task_of(se))))) { + se->avg.load_avg_ratio > max_ratio && + cpumask_intersects(hmp_target_mask, + tsk_cpus_allowed(task_of(se))) && + target_cpu > -1 ? cpumask_test_cpu(target_cpu, + tsk_cpus_allowed(task_of(se))) : 1) { max_se = se; max_ratio = se->avg.load_avg_ratio; } @@ -7103,7 +7103,7 @@ static void hmp_force_up_migration(int this_cpu) } } orig = curr; - curr = hmp_get_heaviest_task(curr, 1); + curr = hmp_get_heaviest_task(curr, -1); p = task_of(curr); if (hmp_up_migration(cpu, &target_cpu, curr)) { cpu_rq(target_cpu)->wake_for_idle_pull = 1; @@ -7200,12 +7200,14 @@ static unsigned int hmp_idle_pull(int this_cpu) } } orig = curr; - curr = hmp_get_heaviest_task(curr, 1); + curr = hmp_get_heaviest_task(curr, this_cpu); /* check if heaviest eligible task on this * CPU is heavier than previous task */ if (hmp_task_eligible_for_up_migration(curr) && - curr->avg.load_avg_ratio > ratio) { + curr->avg.load_avg_ratio > ratio && + cpumask_test_cpu(this_cpu, + tsk_cpus_allowed(task_of(curr)))) { p = task_of(curr); target = rq; ratio = curr->avg.load_avg_ratio;
On Tue, 2014-03-18 at 11:29 +0000, Chris Redpath wrote:
When looking for a task to be idle-pulled, don't consider tasks where the affinity does not allow that task to be placed on the target CPU. Also ensure that tasks with restricted affinity do not block selecting other unrestricted busy tasks.
Use the knowledge of target CPU more effectively in idle pull by passing to hmp_get_heaviest_task when we know it, otherwise only checking for general affinity matches with any of the CPUs in the bigger HMP domain.
We still need to explicitly check affinity is allowed in idle pull since if we find no match in hmp_get_heaviest_task we will return the current one, which may not be affine to the new CPU despite having high enough load. In this case, there is nothing to move.
Signed-off-by: Chris Redpath chris.redpath@arm.com
kernel/sched/fair.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b22906c..431c3c9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3679,30 +3679,30 @@ static inline struct hmp_domain *hmp_faster_domain(int cpu); /* must hold runqueue lock for queue se is currently on */ static struct sched_entity *hmp_get_heaviest_task(
struct sched_entity *se, int migrate_up)
struct sched_entity *se, int target_cpu)
{ int num_tasks = hmp_max_tasks; struct sched_entity *max_se = se; unsigned long int max_ratio = se->avg.load_avg_ratio; const struct cpumask *hmp_target_mask = NULL;
- struct hmp_domain *hmp;
- if (migrate_up) {
struct hmp_domain *hmp;
if (hmp_cpu_is_fastest(cpu_of(se->cfs_rq->rq)))
return max_se;
- if (hmp_cpu_is_fastest(cpu_of(se->cfs_rq->rq)))
return max_se;
- hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq));
- hmp_target_mask = &hmp->cpus;
hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq));
hmp_target_mask = &hmp->cpus;
- } /* The currently running task is not on the runqueue */ se = __pick_first_entity(cfs_rq_of(se));
while (num_tasks && se) { if (entity_is_task(se) &&
(se->avg.load_avg_ratio > max_ratio &&
hmp_target_mask &&
cpumask_intersects(hmp_target_mask,
tsk_cpus_allowed(task_of(se))))) {
se->avg.load_avg_ratio > max_ratio &&
cpumask_intersects(hmp_target_mask,
tsk_cpus_allowed(task_of(se))) &&
target_cpu > -1 ? cpumask_test_cpu(target_cpu,
tsk_cpus_allowed(task_of(se))) : 1) {
max_se = se; max_ratio = se->avg.load_avg_ratio; }
@@ -7103,7 +7103,7 @@ static void hmp_force_up_migration(int this_cpu) } } orig = curr;
curr = hmp_get_heaviest_task(curr, 1);
p = task_of(curr); if (hmp_up_migration(cpu, &target_cpu, curr)) { cpu_rq(target_cpu)->wake_for_idle_pull = 1;curr = hmp_get_heaviest_task(curr, -1);
@@ -7200,12 +7200,14 @@ static unsigned int hmp_idle_pull(int this_cpu) } } orig = curr;
curr = hmp_get_heaviest_task(curr, 1);
/* check if heaviest eligible task on thiscurr = hmp_get_heaviest_task(curr, this_cpu);
*/ if (hmp_task_eligible_for_up_migration(curr) &&
- CPU is heavier than previous task
curr->avg.load_avg_ratio > ratio) {
curr->avg.load_avg_ratio > ratio &&
cpumask_test_cpu(this_cpu,
tsk_cpus_allowed(task_of(curr)))) { p = task_of(curr); target = rq; ratio = curr->avg.load_avg_ratio;
On Tue, 2014-03-18 at 11:29 +0000, Chris Redpath wrote:
When looking for a task to be idle-pulled, don't consider tasks where the affinity does not allow that task to be placed on the target CPU. Also ensure that tasks with restricted affinity do not block selecting other unrestricted busy tasks.
Use the knowledge of target CPU more effectively in idle pull by passing to hmp_get_heaviest_task when we know it, otherwise only checking for general affinity matches with any of the CPUs in the bigger HMP domain.
We still need to explicitly check affinity is allowed in idle pull since if we find no match in hmp_get_heaviest_task we will return the current one, which may not be affine to the new CPU despite having high enough load. In this case, there is nothing to move.
Signed-off-by: Chris Redpath chris.redpath@arm.com
kernel/sched/fair.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b22906c..431c3c9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3679,30 +3679,30 @@ static inline struct hmp_domain *hmp_faster_domain(int cpu); /* must hold runqueue lock for queue se is currently on */ static struct sched_entity *hmp_get_heaviest_task(
struct sched_entity *se, int migrate_up)
struct sched_entity *se, int target_cpu)
{ int num_tasks = hmp_max_tasks; struct sched_entity *max_se = se; unsigned long int max_ratio = se->avg.load_avg_ratio; const struct cpumask *hmp_target_mask = NULL;
- struct hmp_domain *hmp;
- if (migrate_up) {
struct hmp_domain *hmp;
if (hmp_cpu_is_fastest(cpu_of(se->cfs_rq->rq)))
return max_se;
- if (hmp_cpu_is_fastest(cpu_of(se->cfs_rq->rq)))
return max_se;
- hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq));
- hmp_target_mask = &hmp->cpus;
hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq));
hmp_target_mask = &hmp->cpus;
- } /* The currently running task is not on the runqueue */ se = __pick_first_entity(cfs_rq_of(se));
while (num_tasks && se) { if (entity_is_task(se) &&
(se->avg.load_avg_ratio > max_ratio &&
hmp_target_mask &&
cpumask_intersects(hmp_target_mask,
tsk_cpus_allowed(task_of(se))))) {
se->avg.load_avg_ratio > max_ratio &&
cpumask_intersects(hmp_target_mask,
tsk_cpus_allowed(task_of(se))) &&
target_cpu > -1 ? cpumask_test_cpu(target_cpu,
tsk_cpus_allowed(task_of(se))) : 1) {
Instead of adding those two rather messy lines above, could we not instead just change the earlier setup of hmp_target_mask to only be the CPUs we want? E.g.
if (target_cpu >= 0) { hmp_target_mask = cpumask_of(target_cpu); } else { hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq)); hmp_target_mask = &hmp->cpus; }
Or, if you want to do some sanity checking that target_cpu is in the right domain...
hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq)); hmp_target_mask = &hmp->cpus; if (target_cpu >= 0) { BUG_ON(!cpumask_test_cpu(target_cpu, hmp_target_mask)); hmp_target_mask = cpumask_of(target_cpu); }
On 18/03/14 12:48, Jon Medhurst (Tixy) wrote:
On Tue, 2014-03-18 at 11:29 +0000, Chris Redpath wrote:
When looking for a task to be idle-pulled, don't consider tasks where the affinity does not allow that task to be placed on the target CPU. Also ensure that tasks with restricted affinity do not block selecting other unrestricted busy tasks.
Use the knowledge of target CPU more effectively in idle pull by passing to hmp_get_heaviest_task when we know it, otherwise only checking for general affinity matches with any of the CPUs in the bigger HMP domain.
We still need to explicitly check affinity is allowed in idle pull since if we find no match in hmp_get_heaviest_task we will return the current one, which may not be affine to the new CPU despite having high enough load. In this case, there is nothing to move.
Signed-off-by: Chris Redpath chris.redpath@arm.com
kernel/sched/fair.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b22906c..431c3c9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3679,30 +3679,30 @@ static inline struct hmp_domain *hmp_faster_domain(int cpu);
/* must hold runqueue lock for queue se is currently on */ static struct sched_entity *hmp_get_heaviest_task(
struct sched_entity *se, int migrate_up)
{ int num_tasks = hmp_max_tasks; struct sched_entity *max_se = se; unsigned long int max_ratio = se->avg.load_avg_ratio; const struct cpumask *hmp_target_mask = NULL;struct sched_entity *se, int target_cpu)
- struct hmp_domain *hmp;
- if (migrate_up) {
struct hmp_domain *hmp;
if (hmp_cpu_is_fastest(cpu_of(se->cfs_rq->rq)))
return max_se;
- if (hmp_cpu_is_fastest(cpu_of(se->cfs_rq->rq)))
return max_se;
- hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq));
- hmp_target_mask = &hmp->cpus;
hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq));
hmp_target_mask = &hmp->cpus;
} /* The currently running task is not on the runqueue */ se = __pick_first_entity(cfs_rq_of(se));
while (num_tasks && se) { if (entity_is_task(se) &&
(se->avg.load_avg_ratio > max_ratio &&
hmp_target_mask &&
cpumask_intersects(hmp_target_mask,
tsk_cpus_allowed(task_of(se))))) {
se->avg.load_avg_ratio > max_ratio &&
cpumask_intersects(hmp_target_mask,
tsk_cpus_allowed(task_of(se))) &&
target_cpu > -1 ? cpumask_test_cpu(target_cpu,
tsk_cpus_allowed(task_of(se))) : 1) {
Instead of adding those two rather messy lines above, could we not instead just change the earlier setup of hmp_target_mask to only be the CPUs we want? E.g.
if (target_cpu >= 0) { hmp_target_mask = cpumask_of(target_cpu); } else { hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq)); hmp_target_mask = &hmp->cpus; }
Or, if you want to do some sanity checking that target_cpu is in the right domain...
hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq)); hmp_target_mask = &hmp->cpus; if (target_cpu >= 0) { BUG_ON(!cpumask_test_cpu(target_cpu, hmp_target_mask)); hmp_target_mask = cpumask_of(target_cpu); }
I much prefer the second option, thanks :)
linaro-kernel@lists.linaro.org