Hi Ingo/Thomas,
This is V2 of the cleanups around timer-core initialization sent earlier. These make initialization of tvec_base's simpler by statically allocating memory for them, and removing the need of initializing them again on CPU hotplug.
V1->V2: - Dropped 2/3 from earlier set, which moved definition of __tvec_bases within a function, as that caused wreckage on xtensa and tile. - A new patch from Peter is added, 3/3. - Few changes in 1/3 on Ingo's suggestions: - Add explanatory comment around boot_tvec_bases and __tvec_bases. - s/boot_done/boot_cpu_skipped
-- viresh
Peter Zijlstra (2): timer: Allocate per-cpu tvec_base's statically timer: Further simplify SMP and HOTPLUG logic
Viresh Kumar (1): timer: Don't initialize tvec_base on hotplug
kernel/time/timer.c | 143 ++++++++++++++++++++++------------------------------ 1 file changed, 61 insertions(+), 82 deletions(-)
From: Peter Zijlstra peterz@infradead.org
Memory for tvec_base is allocated separately for boot CPU (statically) and non-boot CPUs (dynamically).
The reason is because __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've made NULL special, hint: lock_timer_base()) and we cannot get a compile time pointer to per-cpu entries because we don't know where we'll map the section, even for the boot cpu.
This can be simplified a bit by statically allocating per-cpu memory. The only disadvantage is that memory for one of the structures will stay unused, i.e. for the boot CPU, which uses boot_tvec_bases.
This will also guarantee that tvec_base is cacheline aligned. Even though tvec_base has ____cacheline_aligned stuck on, kzalloc_node() does not actually respect that (but guarantees a minimum u64 alignment).
Cc: Thomas Gleixner tglx@linutronix.de Cc: linaro-kernel@lists.linaro.org Cc: Ingo Molnar mingo@redhat.com Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org --- kernel/time/timer.c | 48 +++++++++++++++++++----------------------------- 1 file changed, 19 insertions(+), 29 deletions(-)
diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 2d3f5c504939..f3cc653f876c 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -90,8 +90,19 @@ struct tvec_base { struct tvec tv5; } ____cacheline_aligned;
+/* + * __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've + * made NULL special, hint: lock_timer_base()) and we cannot get a compile time + * pointer to per-cpu entries because we don't know where we'll map the section, + * even for the boot cpu. + * + * And so we use boot_tvec_bases for boot CPU and per-cpu __tvec_bases for the + * rest of them. + */ struct tvec_base boot_tvec_bases; EXPORT_SYMBOL(boot_tvec_bases); +static DEFINE_PER_CPU(struct tvec_base, __tvec_bases); + static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
/* Functions below help us manage 'deferrable' flag */ @@ -1534,46 +1545,25 @@ EXPORT_SYMBOL(schedule_timeout_uninterruptible);
static int init_timers_cpu(int cpu) { - int j; - struct tvec_base *base; + struct tvec_base *base = per_cpu(tvec_bases, cpu); static char tvec_base_done[NR_CPUS]; + int j;
if (!tvec_base_done[cpu]) { - static char boot_done; + static char boot_cpu_skipped;
- if (boot_done) { - /* - * The APs use this path later in boot - */ - base = kzalloc_node(sizeof(*base), GFP_KERNEL, - cpu_to_node(cpu)); - if (!base) - return -ENOMEM; - - /* Make sure tvec_base has TIMER_FLAG_MASK bits free */ - if (WARN_ON(base != tbase_get_base(base))) { - kfree(base); - return -ENOMEM; - } - per_cpu(tvec_bases, cpu) = base; + if (!boot_cpu_skipped) { + boot_cpu_skipped = 1; /* skip the boot cpu */ } else { - /* - * This is for the boot CPU - we use compile-time - * static initialisation because per-cpu memory isn't - * ready yet and because the memory allocators are not - * initialised either. - */ - boot_done = 1; - base = &boot_tvec_bases; + base = per_cpu_ptr(&__tvec_bases, cpu); + per_cpu(tvec_bases, cpu) = base; } + spin_lock_init(&base->lock); tvec_base_done[cpu] = 1; base->cpu = cpu; - } else { - base = per_cpu(tvec_bases, cpu); }
- for (j = 0; j < TVN_SIZE; j++) { INIT_LIST_HEAD(base->tv5.vec + j); INIT_LIST_HEAD(base->tv4.vec + j);
There is no need to call init_timers_cpu() on every cpu hotplug event, there is not much we need to reset.
- Timer-lists are already empty at the end of migrate_timers(). - timer_jiffies will be refreshed while adding a new timer, after the CPU is online again. - active_timers and all_timers can be reset from migrate_timers().
Cc: Ingo Molnar mingo@redhat.com Cc: Thomas Gleixner tglx@linutronix.de Cc: linaro-kernel@lists.linaro.org Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org --- kernel/time/timer.c | 98 +++++++++++++++++++++++------------------------------ 1 file changed, 43 insertions(+), 55 deletions(-)
diff --git a/kernel/time/timer.c b/kernel/time/timer.c index f3cc653f876c..1feb9c7035c0 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1543,43 +1543,6 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) } EXPORT_SYMBOL(schedule_timeout_uninterruptible);
-static int init_timers_cpu(int cpu) -{ - struct tvec_base *base = per_cpu(tvec_bases, cpu); - static char tvec_base_done[NR_CPUS]; - int j; - - if (!tvec_base_done[cpu]) { - static char boot_cpu_skipped; - - if (!boot_cpu_skipped) { - boot_cpu_skipped = 1; /* skip the boot cpu */ - } else { - base = per_cpu_ptr(&__tvec_bases, cpu); - per_cpu(tvec_bases, cpu) = base; - } - - spin_lock_init(&base->lock); - tvec_base_done[cpu] = 1; - base->cpu = cpu; - } - - for (j = 0; j < TVN_SIZE; j++) { - INIT_LIST_HEAD(base->tv5.vec + j); - INIT_LIST_HEAD(base->tv4.vec + j); - INIT_LIST_HEAD(base->tv3.vec + j); - INIT_LIST_HEAD(base->tv2.vec + j); - } - for (j = 0; j < TVR_SIZE; j++) - INIT_LIST_HEAD(base->tv1.vec + j); - - base->timer_jiffies = jiffies; - base->next_timer = base->timer_jiffies; - base->active_timers = 0; - base->all_timers = 0; - return 0; -} - #ifdef CONFIG_HOTPLUG_CPU static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) { @@ -1621,6 +1584,9 @@ static void migrate_timers(int cpu) migrate_timer_list(new_base, old_base->tv5.vec + i); }
+ old_base->active_timers = 0; + old_base->all_timers = 0; + spin_unlock(&old_base->lock); spin_unlock_irq(&new_base->lock); put_cpu_var(tvec_bases); @@ -1630,25 +1596,16 @@ static void migrate_timers(int cpu) static int timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - long cpu = (long)hcpu; - int err; - - switch(action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - err = init_timers_cpu(cpu); - if (err < 0) - return notifier_from_errno(err); - break; #ifdef CONFIG_HOTPLUG_CPU + switch (action) { case CPU_DEAD: case CPU_DEAD_FROZEN: - migrate_timers(cpu); + migrate_timers((long)hcpu); break; -#endif default: break; } +#endif return NOTIFY_OK; }
@@ -1656,18 +1613,49 @@ static struct notifier_block timers_nb = { .notifier_call = timer_cpu_notify, };
+static void __init init_timer_cpu(struct tvec_base *base, int cpu) +{ + int j;
-void __init init_timers(void) + base->cpu = cpu; + per_cpu(tvec_bases, cpu) = base; + spin_lock_init(&base->lock); + + for (j = 0; j < TVN_SIZE; j++) { + INIT_LIST_HEAD(base->tv5.vec + j); + INIT_LIST_HEAD(base->tv4.vec + j); + INIT_LIST_HEAD(base->tv3.vec + j); + INIT_LIST_HEAD(base->tv2.vec + j); + } + for (j = 0; j < TVR_SIZE; j++) + INIT_LIST_HEAD(base->tv1.vec + j); + + base->timer_jiffies = jiffies; + base->next_timer = base->timer_jiffies; +} + +static void __init init_timer_cpus(void) { - int err; + struct tvec_base *base; + int local_cpu = smp_processor_id(); + int cpu; + + for_each_possible_cpu(cpu) { + if (cpu == local_cpu) + base = &boot_tvec_bases; + else + base = per_cpu_ptr(&__tvec_bases, cpu); + + init_timer_cpu(base, cpu); + } +}
+void __init init_timers(void) +{ /* ensure there are enough low bits for flags in timer->base pointer */ BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
- err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, - (void *)(long)smp_processor_id()); - BUG_ON(err != NOTIFY_OK); - + init_timer_cpus(); init_timer_stats(); register_cpu_notifier(&timers_nb); open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
From: Peter Zijlstra peterz@infradead.org
Remove one CONFIG_HOTPLUG_CPU #ifdef in trade for introducing one CONFIG_SMP #ifdef.
The CONFIG_SMP ifdef avoids declaring the per-cpu __tvec_bases storage on UP systems since they already have boot_tvec_bases.
Also (re)add a runtime check on the base alignment -- for the paranoid amongst us :-)
Cc: Thomas Gleixner tglx@linutronix.de Cc: linaro-kernel@lists.linaro.org Cc: Ingo Molnar mingo@redhat.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Viresh Kumar viresh.kumar@linaro.org --- kernel/time/timer.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 1feb9c7035c0..aa03ac8c8d98 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -101,7 +101,9 @@ struct tvec_base { */ struct tvec_base boot_tvec_bases; EXPORT_SYMBOL(boot_tvec_bases); +#ifdef CONFIG_SMP static DEFINE_PER_CPU(struct tvec_base, __tvec_bases); +#endif
static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
@@ -1591,12 +1593,10 @@ static void migrate_timers(int cpu) spin_unlock_irq(&new_base->lock); put_cpu_var(tvec_bases); } -#endif /* CONFIG_HOTPLUG_CPU */
static int timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { -#ifdef CONFIG_HOTPLUG_CPU switch (action) { case CPU_DEAD: case CPU_DEAD_FROZEN: @@ -1605,18 +1605,17 @@ static int timer_cpu_notify(struct notifier_block *self, default: break; } -#endif + return NOTIFY_OK; } - -static struct notifier_block timers_nb = { - .notifier_call = timer_cpu_notify, -}; +#endif /* CONFIG_HOTPLUG_CPU */
static void __init init_timer_cpu(struct tvec_base *base, int cpu) { int j;
+ BUG_ON(base != tbase_get_base(base)); + base->cpu = cpu; per_cpu(tvec_bases, cpu) = base; spin_lock_init(&base->lock); @@ -1643,8 +1642,10 @@ static void __init init_timer_cpus(void) for_each_possible_cpu(cpu) { if (cpu == local_cpu) base = &boot_tvec_bases; +#ifdef CONFIG_SMP else base = per_cpu_ptr(&__tvec_bases, cpu); +#endif
init_timer_cpu(base, cpu); } @@ -1657,7 +1658,7 @@ void __init init_timers(void)
init_timer_cpus(); init_timer_stats(); - register_cpu_notifier(&timers_nb); + cpu_notifier(timer_cpu_notify, 0); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); }
linaro-kernel@lists.linaro.org