clocksource: Make watchdog reset lockless
KGDB needs to trylock watchdog_lock when trying to reset the clocksource watchdog after the system has been stopped to avoid a potential deadlock. When the trylock fails TSC usually becomes unstable. We can be more clever by using an atomic counter and checking it in the clocksource_watchdog callback. We restart the watchdog whenever the counter is > 0 and only decrement the counter when we ran through a full update cycle. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: John Stultz <johnstul@us.ibm.com> Acked-by: Jason Wessel <jason.wessel@windriver.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1109121326280.2723@ionos Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
@@ -186,6 +186,7 @@ static struct timer_list watchdog_timer;
|
|||||||
static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
|
static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
|
||||||
static DEFINE_SPINLOCK(watchdog_lock);
|
static DEFINE_SPINLOCK(watchdog_lock);
|
||||||
static int watchdog_running;
|
static int watchdog_running;
|
||||||
|
static atomic_t watchdog_reset_pending;
|
||||||
|
|
||||||
static int clocksource_watchdog_kthread(void *data);
|
static int clocksource_watchdog_kthread(void *data);
|
||||||
static void __clocksource_change_rating(struct clocksource *cs, int rating);
|
static void __clocksource_change_rating(struct clocksource *cs, int rating);
|
||||||
@@ -247,12 +248,14 @@ static void clocksource_watchdog(unsigned long data)
|
|||||||
struct clocksource *cs;
|
struct clocksource *cs;
|
||||||
cycle_t csnow, wdnow;
|
cycle_t csnow, wdnow;
|
||||||
int64_t wd_nsec, cs_nsec;
|
int64_t wd_nsec, cs_nsec;
|
||||||
int next_cpu;
|
int next_cpu, reset_pending;
|
||||||
|
|
||||||
spin_lock(&watchdog_lock);
|
spin_lock(&watchdog_lock);
|
||||||
if (!watchdog_running)
|
if (!watchdog_running)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
reset_pending = atomic_read(&watchdog_reset_pending);
|
||||||
|
|
||||||
list_for_each_entry(cs, &watchdog_list, wd_list) {
|
list_for_each_entry(cs, &watchdog_list, wd_list) {
|
||||||
|
|
||||||
/* Clocksource already marked unstable? */
|
/* Clocksource already marked unstable? */
|
||||||
@@ -268,7 +271,8 @@ static void clocksource_watchdog(unsigned long data)
|
|||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
|
|
||||||
/* Clocksource initialized ? */
|
/* Clocksource initialized ? */
|
||||||
if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) {
|
if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
|
||||||
|
atomic_read(&watchdog_reset_pending)) {
|
||||||
cs->flags |= CLOCK_SOURCE_WATCHDOG;
|
cs->flags |= CLOCK_SOURCE_WATCHDOG;
|
||||||
cs->wd_last = wdnow;
|
cs->wd_last = wdnow;
|
||||||
cs->cs_last = csnow;
|
cs->cs_last = csnow;
|
||||||
@@ -283,8 +287,11 @@ static void clocksource_watchdog(unsigned long data)
|
|||||||
cs->cs_last = csnow;
|
cs->cs_last = csnow;
|
||||||
cs->wd_last = wdnow;
|
cs->wd_last = wdnow;
|
||||||
|
|
||||||
|
if (atomic_read(&watchdog_reset_pending))
|
||||||
|
continue;
|
||||||
|
|
||||||
/* Check the deviation from the watchdog clocksource. */
|
/* Check the deviation from the watchdog clocksource. */
|
||||||
if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
|
if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
|
||||||
clocksource_unstable(cs, cs_nsec - wd_nsec);
|
clocksource_unstable(cs, cs_nsec - wd_nsec);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -302,6 +309,13 @@ static void clocksource_watchdog(unsigned long data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We only clear the watchdog_reset_pending, when we did a
|
||||||
|
* full cycle through all clocksources.
|
||||||
|
*/
|
||||||
|
if (reset_pending)
|
||||||
|
atomic_dec(&watchdog_reset_pending);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Cycle through CPUs to check if the CPUs stay synchronized
|
* Cycle through CPUs to check if the CPUs stay synchronized
|
||||||
* to each other.
|
* to each other.
|
||||||
@@ -344,23 +358,7 @@ static inline void clocksource_reset_watchdog(void)
|
|||||||
|
|
||||||
static void clocksource_resume_watchdog(void)
|
static void clocksource_resume_watchdog(void)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
atomic_inc(&watchdog_reset_pending);
|
||||||
|
|
||||||
/*
|
|
||||||
* We use trylock here to avoid a potential dead lock when
|
|
||||||
* kgdb calls this code after the kernel has been stopped with
|
|
||||||
* watchdog_lock held. When watchdog_lock is held we just
|
|
||||||
* return and accept, that the watchdog might trigger and mark
|
|
||||||
* the monitored clock source (usually TSC) unstable.
|
|
||||||
*
|
|
||||||
* This does not affect the other caller clocksource_resume()
|
|
||||||
* because at this point the kernel is UP, interrupts are
|
|
||||||
* disabled and nothing can hold watchdog_lock.
|
|
||||||
*/
|
|
||||||
if (!spin_trylock_irqsave(&watchdog_lock, flags))
|
|
||||||
return;
|
|
||||||
clocksource_reset_watchdog();
|
|
||||||
spin_unlock_irqrestore(&watchdog_lock, flags);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void clocksource_enqueue_watchdog(struct clocksource *cs)
|
static void clocksource_enqueue_watchdog(struct clocksource *cs)
|
||||||
|
Reference in New Issue
Block a user