Hi Vincent,
like promised in our last last 'technical sync-up meeting' here is some feed-back on your patch. The version of the patch is from March this year so a lot of stuff has changed in the meantime but I hope this feedback is still valuable.
You might already have addressed some of the issues in your current rebase of the patch.
The overall idea seems to be to piggyback NOHZ_STATS_KICK onto the NOHZ_BALANCE_KICK machinery so if the back-end (SCHED_SOFTIRQ) can make a distinction between the need to nohz-stats-update or nohz-balance.
-- Dietmar
On 14/03/16 09:55, Vincent Guittot wrote:
Conflicts: kernel/sched/fair.c
Hi Morten,
I have finally been able to fix my connection issue. This patch uses the update_blocked_averages mecanism that is present in the ILB to ensure that the blocked load will be updated often enough tostay meaningful. There is still some part that should be fixed like the fixed 5 tick after next update to trig an update.
Vincent
kernel/sched/fair.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++---- kernel/sched/sched.h | 1 + 2 files changed, 61 insertions(+), 4 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d2d0df4..a716299 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5108,6 +5108,9 @@ static int get_cpu_usage(int cpu) return (usage * capacity) >> SCHED_LOAD_SHIFT; }
+static inline bool nohz_stat_kick_needed(int cpu); +static void nohz_balancer_kick(bool only_update);
/*
- select_task_rq_fair: Select target runqueue for the waking task in domains
- that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
@@ -5167,6 +5170,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */ new_cpu = select_idle_sibling(p, new_cpu);
+#ifdef CONFIG_NO_HZ_COMMON
if (nohz_stat_kick_needed(new_cpu))
nohz_balancer_kick(true);
+#endif
Why do you ask the update thing only in the select idle sibling path?
} else while (sd) { struct sched_group *group; int weight; @@ -7313,6 +7320,12 @@ static int load_balance(int this_cpu, struct rq *this_rq, }
group = find_busiest_group(&env);
- if (test_and_clear_bit(NOHZ_STATS_KICK, nohz_flags(this_cpu))) {
ld_moved = 0;
goto out;
- }
- if (!group) { schedstat_inc(sd, lb_nobusyg[idle]); goto out_balanced;
@@ -7585,8 +7598,9 @@ static int idle_balance(struct rq *this_rq) */ this_rq->idle_stamp = rq_clock(this_rq);
- if (this_rq->avg_idle < sysctl_sched_migration_cost ||
!this_rq->rd->overload) {
- if (!test_bit(NOHZ_STATS_KICK, nohz_flags(this_cpu)) &&
(this_rq->avg_idle < sysctl_sched_migration_cost ||
!this_rq->rd->overload)) {
In case 'NOHZ_STATS_KICK' is set you want to call the update_blocked_averages(this_cpu) below but do you also want to do the actual idle load balancing?
rcu_read_lock(); sd = rcu_dereference_check_sched_domain(this_rq->sd); if (sd)
@@ -7639,6 +7653,8 @@ static int idle_balance(struct rq *this_rq)
raw_spin_lock(&this_rq->lock);
- clear_bit(NOHZ_STATS_KICK, nohz_flags(this_cpu));
- if (curr_cost > this_rq->max_idle_balance_cost) this_rq->max_idle_balance_cost = curr_cost;
@@ -7776,6 +7792,9 @@ static inline int find_new_ilb(void) ilb = cpumask_first_and(sched_domain_span(sd), nohz.idle_cpus_mask);
Didn't compile for me. I can't find an sd in find_new_ilb() in mainline. Did you add it in a previous patch?
if (ilb == smp_processor_id())
ilb = cpumask_next_and(ilb, sched_domain_span(sd),
}nohz.idle_cpus_mask); if (ilb < nr_cpu_ids) break;
@@ -7793,7 +7812,7 @@ static inline int find_new_ilb(void)
- nohz_load_balancer CPU (if there is one) otherwise fallback to any idle
- CPU (if there is one).
*/ -static void nohz_balancer_kick(void) +static void nohz_balancer_kick(bool only_update) { int ilb_cpu;
@@ -7806,6 +7825,9 @@ static void nohz_balancer_kick(void)
if (test_and_set_bit(NOHZ_BALANCE_KICK, nohz_flags(ilb_cpu))) return;
- if(only_update)
/*set_bit(NOHZ_STATS_KICK, nohz_flags(ilb_cpu));
- Use smp_send_reschedule() instead of resched_cpu().
- This way we generate a sched IPI on the target cpu which
@@ -8000,6 +8022,8 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) } rcu_read_unlock();
- /* clear any pending stats update request */
- clear_bit(NOHZ_STATS_KICK, nohz_flags(cpu)); /*
- next_balance will be updated only when there is a need.
- When the cpu is attached to null domain for ex, it will not be
@@ -8019,11 +8043,14 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) int this_cpu = this_rq->cpu; struct rq *rq; int balance_cpu;
int update_stats_only = 0;
if (idle != CPU_IDLE || !test_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu))) goto end;
if (test_bit(NOHZ_STATS_KICK, nohz_flags(this_cpu)))
update_stats_only = 1;
for_each_cpu(balance_cpu, nohz.idle_cpus_mask) { if (balance_cpu == this_cpu || !idle_cpu(balance_cpu)) continue;
@@ -8043,6 +8070,11 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) * do the balance. */ if (time_after_eq(jiffies, rq->next_balance)) {
/* only stats update is required */
if (update_stats_only)
set_bit(NOHZ_STATS_KICK, nohz_flags(balance_cpu));
Why don't you call update_blocked_averages(balance_cpu) here in skip the rebalance_domains() call in case of update_stats_only = 1 (i.e. in case NOHZ_STATS_KICK was set on this_cpu. I assume here that when NOHZ_STATS_KICK is set we really only want to do the update and no actual load balancing.
raw_spin_lock_irq(&rq->lock); update_rq_clock(rq); update_idle_cpu_load(rq);
@@ -8137,8 +8169,32 @@ static inline bool nohz_kick_needed(struct rq *rq) rcu_read_unlock(); return kick; }
+static inline bool nohz_stat_kick_needed(int cpu) +{
- unsigned long now = jiffies;
You don't bail here if rq->idle_balance is set like nohz_kick_needed() does?
- /*
* None are in tickless mode and hence no need for NOHZ idle load
* balancing.
*/
- if (likely(!atomic_read(&nohz.nr_cpus)))
return false;
- if (time_before(now, nohz.next_balance+5))
return false;
- /* ensure that this cpu statistics will be updated */
- set_bit(NOHZ_STATS_KICK, nohz_flags(cpu));
- return true;
+}
#else static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { } +static inline bool nohz_stat_kick_neede/d(int cpu) { return false } #endif
/* @@ -8176,7 +8232,7 @@ void trigger_load_balance(struct rq *rq) raise_softirq(SCHED_SOFTIRQ); #ifdef CONFIG_NO_HZ_COMMON if (nohz_kick_needed(rq))
nohz_balancer_kick();
nohz_balancer_kick(false);
#endif }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 676be22c..9cf53df 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1716,6 +1716,7 @@ extern void cfs_bandwidth_usage_dec(void); enum rq_nohz_flag_bits { NOHZ_TICK_STOPPED, NOHZ_BALANCE_KICK,
- NOHZ_STATS_KICK,
};
#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)