Re: [PATCH 3/3] hmp: Use idle pull to perform forced up-migrations

14 Mar 2014

On Thu, 2014-03-13 at 17:19 +0000, Chris Redpath wrote:
...
When a normal forced up-migration takes place we stop the task to
be migrated while the target CPU becomes available. This delay can
range from 80us to 1500us on TC2 if the target CPU is in a deep idle
state.
Instead, interrupt the target CPU and ask it to pull a task.
This lets the current eligible task continue executing on the
original CPU while the target CPU wakes. Use a pinned timer to
prevent the pulling CPU going back into power-down with pending
up-migrations.
If the target CPU is busy, we will not pull any tasks.
Signed-off-by: Chris Redpath chris.redpath@arm.com
kernel/sched/core.c  |   18 +++++++++++-
 kernel/sched/fair.c  |   74 ++++++++++++++++++++++++++++++++++++++++++--------
 kernel/sched/sched.h |    1 +
 3 files changed, 81 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index de9d360..89e49e1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1407,7 +1407,11 @@ void scheduler_ipi(void)
 {
   if (llist_empty(&this_rq()->wake_list)
   		&& !tick_nohz_full_cpu(smp_processor_id())

	&& !got_nohz_idle_kick())




	&& !got_nohz_idle_kick()



+#ifdef CONFIG_SCHED_HMP

	&& !this_rq()->wake_for_idle_pull



+#endif

	)

return;

/*
@@ -1430,10 +1434,22 @@ void scheduler_ipi(void)
   /*
    * Check if someone kicked us for doing the nohz idle load balance.
    */
+#ifdef CONFIG_SCHED_HMP

{
bool nohz = got_nohz_idle_kick();


if (unlikely(this_rq()->wake_for_idle_pull) ||


	unlikely(nohz)) {


	if (unlikely(nohz))


		this_rq()->idle_balance = 1;


	raise_softirq_irqoff(SCHED_SOFTIRQ);


}


}

+#else
   if (unlikely(got_nohz_idle_kick())) {
   	this_rq()->idle_balance = 1;
   	raise_softirq_irqoff(SCHED_SOFTIRQ);
   }
+#endif
Wouldn't the above change have been much simpler as:
if (unlikely(got_nohz_idle_kick())) {
    	this_rq()->idle_balance = 1;
    	raise_softirq_irqoff(SCHED_SOFTIRQ);
    }
+#ifdef CONFIG_SCHED_HMP
+	else if (unlikely(this_rq()->wake_for_idle_pull)
+		raise_softirq_irqoff(SCHED_SOFTIRQ);
+#endif
That would also avoid having the comment about checking for 'nohz idle
load balance' associated with the HMP code which also does the idle pull
check.
...
irq_exit();
 }
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a5cae4a..d59e0de 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3541,6 +3541,43 @@ static const int hmp_max_tasks = 5;
 
 extern void __init arch_get_hmp_domains(struct list_head *hmp_domains_list);
 
+struct hmp_keepalive {

bool init;
ktime_t delay;
struct hrtimer timer;

+};
+DEFINE_PER_CPU(struct hmp_keepalive, hmp_cpu_keepalive);



+/* setup per-cpu keepalive timers */
+static enum hrtimer_restart hmp_cpu_keepalive_notify(struct hrtimer *hrtimer)
+{

return HRTIMER_NORESTART;

+}



+static inline void hmp_cpu_keepalive_init(struct hmp_keepalive *keepalive, int cpu)
+{

struct hrtimer *hrtimer;
unsigned int ns_delay = 1000000;

Is there any significance to a 1ms delay? Should the value be related to
the scheduler tick, or idle wakeup latency? Or anything which needs to
be different on different platforms?
If it's idle related, I notice this figure is the same as the
target_residency for the TC2 driver, and before when people have had
timers set for this period things sit on a knife edge as to whether a
given version of a kernel will decide that it has enough time to go
idle, or not. Which can give dramatically different behaviour from build
to build and week to week.
...

if (keepalive->init)
return;


/* make sure we only init for the right CPU */
WARN_ON(keepalive != &per_cpu(hmp_cpu_keepalive, smp_processor_id()));

To be nit-picky, this WARN seems a little redundant given that the only
caller (below) passes in exactly the parameter the function checks for;
and I can't help wonder if this init function can't just be part of
hmp_cpu_keepalive_trigger(). Noe of this is important enough to warrant
changing the code though.
...

hrtimer = &keepalive->timer;
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
hrtimer->function = hmp_cpu_keepalive_notify;
keepalive->delay = ns_to_ktime(ns_delay);
keepalive->init = true;

+}



+static void hmp_cpu_keepalive_trigger(void)
+{

int cpu = smp_processor_id();
struct hmp_keepalive *keepalive = &per_cpu(hmp_cpu_keepalive, cpu);
hmp_cpu_keepalive_init(keepalive, cpu);
hrtimer_start(&keepalive->timer, keepalive->delay, HRTIMER_MODE_REL_PINNED);

+}



/* Setup hmp_domains */
 static int __init hmp_cpu_mask_setup(void)
 {
@@ -3598,9 +3635,13 @@ static void hmp_online_cpu(int cpu)
 static void hmp_offline_cpu(int cpu)
 {
   struct hmp_domain *domain = hmp_get_hmp_domain_for_cpu(cpu);

struct hmp_keepalive *keepalive = &per_cpu(hmp_cpu_keepalive, cpu);

if(domain)
   	cpumask_clear_cpu(cpu, &domain->cpus);


if (keepalive->init)
hrtimer_cancel(&keepalive->timer);



}
 /*

Needed to determine heaviest tasks etc.

@@ -7021,7 +7062,7 @@ static void hmp_force_up_migration(int this_cpu)
   	target = cpu_rq(cpu);
   	raw_spin_lock_irqsave(&target->lock, flags);
   	curr = target->cfs.curr;

if (!curr) {




if (!curr || target->active_balance) {
raw_spin_unlock_irqrestore(&target->lock, flags);
continue;

}

@@ -7038,16 +7079,13 @@ static void hmp_force_up_migration(int this_cpu)
   	curr = hmp_get_heaviest_task(curr, 1);
   	p = task_of(curr);
   	if (hmp_up_migration(cpu, &target_cpu, curr)) {

	if (!target->active_balance) {


		get_task_struct(p);


		target->push_cpu = target_cpu;


		target->migrate_task = p;


		got_target = 1;


		trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_FORCE);


		hmp_next_up_delay(&p->se, target->push_cpu);


	}




	cpu_rq(target_cpu)->wake_for_idle_pull = 1;


	raw_spin_unlock_irqrestore(&target->lock, flags);


	spin_unlock(&hmp_force_migration);


	smp_send_reschedule(target_cpu);


	return;



If I've followed the code correctly, does this change now mean that a
CPU stops considering up migrations after it found this one, whereas
before it would continue to look for others as well. Do we want to still
do that? Guess it might be too complicated if we're now letting the
other CPU pull things in it's own time. Or, if we were in the situation
before where two tasks needed up migrating, would the new pull method
actually find both of those to pull anyway and so behaviour is the same
as before?
...
}


if (!got_target && !target->active_balance) {




if (!got_target) {
/*
 * For now we just check the currently running task.
 * Selecting the lightest task for offloading will



@@ -7069,7 +7107,7 @@ static void hmp_force_up_migration(int this_cpu)
   	 * is not currently running move it, otherwise let the
   	 * CPU stopper take care of it.
   	 */

if (got_target && !target->active_balance) {




if (got_target) {
if (!task_running(target, p)) {
	trace_sched_hmp_migrate_force_running(p, 0);
	hmp_migrate_runnable_task(target);



@@ -7175,6 +7213,8 @@ static unsigned int hmp_idle_pull(int this_cpu)
   raw_spin_unlock_irqrestore(&target->lock, flags);
 
   if (force) {

/* start timer to keep us awake */


hmp_cpu_keepalive_trigger();

stop_one_cpu_nowait(cpu_of(target),
	hmp_active_task_migration_cpu_stop,
	target, &target->active_balance_work);

@@ -7198,6 +7238,18 @@ static void run_rebalance_domains(struct softirq_action *h)
   enum cpu_idle_type idle = this_rq->idle_balance ?
   					CPU_IDLE : CPU_NOT_IDLE;
 
+#ifdef CONFIG_SCHED_HMP

/* shortcut for hmp idle pull wakeups */
if (unlikely(this_rq->wake_for_idle_pull)) {
this_rq->wake_for_idle_pull = 0;


if (hmp_idle_pull(this_cpu)) {


	/* break out unless running nohz idle as well */


	if (idle != CPU_IDLE)


		return;


}


}

+#endif

hmp_force_up_migration(this_cpu);

rebalance_domains(this_cpu, idle);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 989c5ae..0d19ede 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -466,6 +466,7 @@ struct rq {
   struct cpu_stop_work active_balance_work;
 #ifdef CONFIG_SCHED_HMP
   struct task_struct *migrate_task;

int wake_for_idle_pull;

#endif
   /* cpu of this runqueue: */
   int cpu;
-- 
Tixy



    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

Re: [PATCH 3/3] hmp: Use idle pull to perform forced up-migrations

Signed-off-by: Chris Redpath chris.redpath@arm.com