Hi Thomas,
I haven't tested it much till now. I am sending this patch just to check if the initial idea looks fine to you guys or not.
Till now, we weren't migrating a running timer because with migration del_timer_sync() can't detect that the timer's handler yet has not finished.
Now, when can we actually to reach to the code (inside __mod_timer()) where
base->running_timer == timer ? i.e. We are trying to migrate current timer
I can see only following case: - Timer re-armed itself. i.e. Currently we are running interrupt handler of a timer and it rearmed itself from there. At this time user might have called del_timer_sync() or not. If not, then there is no harm in re-arming the timer?
Now, when somebody tries to delete a timer, obviously he doesn't want to run it any more for now. So, why should we ever re-arm a timer, which is scheduled for deletion?
This patch tries to fix "migration of running timer", assuming above theory is correct :)
So, now when we get a call to del_timer_sync(), we will mark it scheduled for deletion in an additional variable. This would be checked whenever we try to modify/arm it again. If it was currently scheduled for deletion, we must not modify/arm it.
And so, whenever we reach to the situation where: base->running_timer == timer
We are sure, nobody is waiting in del_timer_sync().
We will clear this flag as soon as the timer is deleted, so that it can be started again after deleting it successfully.
Waiting for initial comments on it.
Signed-off-by: Viresh Kumar viresh.kumar@linaro.org --- include/linux/timer.h | 1 + kernel/timer.c | 58 +++++++++++++++++++++++++++++---------------------- 2 files changed, 34 insertions(+), 25 deletions(-)
diff --git a/include/linux/timer.h b/include/linux/timer.h index 8c5a197..ea36ce9 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -22,6 +22,7 @@ struct timer_list { unsigned long data;
int slack; + int sched_del;
#ifdef CONFIG_TIMER_STATS int start_pid; diff --git a/kernel/timer.c b/kernel/timer.c index 1cf8a91..536e7a3 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -620,6 +620,7 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags, timer->entry.next = NULL; timer->base = (void *)((unsigned long)base | flags); timer->slack = -1; + timer->sched_del = 0; #ifdef CONFIG_TIMER_STATS timer->start_site = NULL; timer->start_pid = -1; @@ -727,38 +728,35 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
base = lock_timer_base(timer, &flags);
+ if (timer->sched_del) { + /* Don't schedule it again, as it is getting deleted */ + ret = -EBUSY; + goto out_unlock; + } + ret = detach_if_pending(timer, base, false); if (!ret && pending_only) goto out_unlock;
debug_activate(timer, expires);
- /* - * Should we try to migrate timer? - * However we can't change timer's base while it is running, otherwise - * del_timer_sync() can't detect that the timer's handler yet has not - * finished. This also guarantees that the timer is serialized wrt - * itself. - */ - if (likely(base->running_timer != timer)) { #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) - if (!pinned && get_sysctl_timer_migration()) - cpu = get_nohz_timer_target(); - else - cpu = smp_processor_id(); -#else + if (!pinned && get_sysctl_timer_migration()) + cpu = get_nohz_timer_target(); + else cpu = smp_processor_id(); +#else + cpu = smp_processor_id(); #endif - new_base = per_cpu(tvec_bases, cpu); - - if (base != new_base) { - /* See the comment in lock_timer_base() */ - timer_set_base(timer, NULL); - spin_unlock(&base->lock); - base = new_base; - spin_lock(&base->lock); - timer_set_base(timer, base); - } + new_base = per_cpu(tvec_bases, cpu); + + if (base != new_base) { + /* See the comment in lock_timer_base() */ + timer_set_base(timer, NULL); + spin_unlock(&base->lock); + base = new_base; + spin_lock(&base->lock); + timer_set_base(timer, base); }
timer->expires = expires; @@ -1037,9 +1035,11 @@ EXPORT_SYMBOL(try_to_del_timer_sync); */ int del_timer_sync(struct timer_list *timer) { -#ifdef CONFIG_LOCKDEP + struct tvec_base *base; unsigned long flags;
+#ifdef CONFIG_LOCKDEP + /* * If lockdep gives a backtrace here, please reference * the synchronization rules above. @@ -1049,6 +1049,12 @@ int del_timer_sync(struct timer_list *timer) lock_map_release(&timer->lockdep_map); local_irq_restore(flags); #endif + + /* Timer is scheduled for deletion, don't let it re-arm itself */ + base = lock_timer_base(timer, &flags); + timer->sched_del = 1; + spin_unlock_irqrestore(&base->lock, flags); + /* * don't use it in hardirq context, because it * could lead to deadlock. @@ -1056,8 +1062,10 @@ int del_timer_sync(struct timer_list *timer) WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base)); for (;;) { int ret = try_to_del_timer_sync(timer); - if (ret >= 0) + if (ret >= 0) { + timer->sched_del = 0; return ret; + } cpu_relax(); } }