clocksource: Make watchdog reset lockless

KGDB needs to trylock watchdog_lock when trying to reset the clocksource watchdog after the system has been stopped to avoid a potential deadlock. When the trylock fails TSC usually becomes unstable. We can be more clever by using an atomic counter and checking it in the clocksource_watchdog callback. We restart the watchdog whenever the counter is > 0 and only decrement the counter when we ran through a full update cycle. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: John Stultz <johnstul@us.ibm.com> Acked-by: Jason Wessel <jason.wessel@windriver.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1109121326280.2723@ionos Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2011-09-12 13:32:23 +02:00
parent e8abccb719
commit 9fb6033625
1 changed files with 18 additions and 20 deletions
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -186,6 +186,7 @@ static struct timer_list watchdog_timer;
 static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
 static DEFINE_SPINLOCK(watchdog_lock);
 static int watchdog_running;
 static atomic_t watchdog_reset_pending;
 static int clocksource_watchdog_kthread(void *data);
 static void __clocksource_change_rating(struct clocksource *cs, int rating);
@@ -247,12 +248,14 @@ static void clocksource_watchdog(unsigned long data)
 	struct clocksource *cs;
 	cycle_t csnow, wdnow;
 	int64_t wd_nsec, cs_nsec;
-	int next_cpu;
+	int next_cpu, reset_pending;
 	spin_lock(&watchdog_lock);
 	if (!watchdog_running)
 		goto out;
 	reset_pending = atomic_read(&watchdog_reset_pending);
 	list_for_each_entry(cs, &watchdog_list, wd_list) {
 		/* Clocksource already marked unstable? */
@@ -268,7 +271,8 @@ static void clocksource_watchdog(unsigned long data)
 		local_irq_enable();
 		/* Clocksource initialized ? */
-		if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) {
+		if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
 		    atomic_read(&watchdog_reset_pending)) {
 			cs->flags |= CLOCK_SOURCE_WATCHDOG;
 			cs->wd_last = wdnow;
 			cs->cs_last = csnow;
@@ -283,8 +287,11 @@ static void clocksource_watchdog(unsigned long data)
 		cs->cs_last = csnow;
 		cs->wd_last = wdnow;
 		if (atomic_read(&watchdog_reset_pending))
 			continue;
 		/* Check the deviation from the watchdog clocksource. */
-		if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
+		if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
 			clocksource_unstable(cs, cs_nsec - wd_nsec);
 			continue;
 		}
@@ -302,6 +309,13 @@ static void clocksource_watchdog(unsigned long data)
 		}
 	}
 	/*
 	 * We only clear the watchdog_reset_pending, when we did a
 	 * full cycle through all clocksources.
 	 */
 	if (reset_pending)
 		atomic_dec(&watchdog_reset_pending);
 	/*
 	 * Cycle through CPUs to check if the CPUs stay synchronized
 	 * to each other.
@@ -344,23 +358,7 @@ static inline void clocksource_reset_watchdog(void)
 static void clocksource_resume_watchdog(void)
 {
-	unsigned long flags;
+	atomic_inc(&watchdog_reset_pending);
 	/*
 	 * We use trylock here to avoid a potential dead lock when
 	 * kgdb calls this code after the kernel has been stopped with
 	 * watchdog_lock held. When watchdog_lock is held we just
 	 * return and accept, that the watchdog might trigger and mark
 	 * the monitored clock source (usually TSC) unstable.
 	 *
 	 * This does not affect the other caller clocksource_resume()
 	 * because at this point the kernel is UP, interrupts are
 	 * disabled and nothing can hold watchdog_lock.
 	 */
 	if (!spin_trylock_irqsave(&watchdog_lock, flags))
 		return;
 	clocksource_reset_watchdog();
 	spin_unlock_irqrestore(&watchdog_lock, flags);
 }
 static void clocksource_enqueue_watchdog(struct clocksource *cs)