Merge branch 'x86-debug-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

* 'x86-debug-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, reboot: Fix typo in nmi reboot path
  x86, NMI: Add to_cpumask() to silence compile warning
  x86, NMI: NMI selftest depends on the local apic
  x86: Add stack top margin for stack overflow checking
  x86, NMI: NMI-selftest should handle the UP case properly
  x86: Fix the 32-bit stackoverflow-debug build
  x86, NMI: Add knob to disable using NMI IPIs to stop cpus
  x86, NMI: Add NMI IPI selftest
  x86, reboot: Use NMI instead of REBOOT_VECTOR to stop cpus
  x86: Clean up the range of stack overflow checking
  x86: Panic on detection of stack overflow
  x86: Check stack overflow in detail
This commit is contained in:
Linus Torvalds
2012-01-11 19:13:04 -08:00
12 changed files with 337 additions and 10 deletions

View File

@@ -1824,6 +1824,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nomfgpt [X86-32] Disable Multi-Function General Purpose nomfgpt [X86-32] Disable Multi-Function General Purpose
Timer usage (for AMD Geode machines). Timer usage (for AMD Geode machines).
nonmi_ipi [X86] Disable using NMI IPIs during panic/reboot to
shutdown the other cpus. Instead use the REBOOT_VECTOR
irq.
nopat [X86] Disable PAT (page attribute table extension of nopat [X86] Disable PAT (page attribute table extension of
pagetables) support. pagetables) support.

View File

@@ -49,6 +49,7 @@ show up in /proc/sys/kernel:
- panic - panic
- panic_on_oops - panic_on_oops
- panic_on_unrecovered_nmi - panic_on_unrecovered_nmi
- panic_on_stackoverflow
- pid_max - pid_max
- powersave-nap [ PPC only ] - powersave-nap [ PPC only ]
- printk - printk
@@ -393,6 +394,19 @@ Controls the kernel's behaviour when an oops or BUG is encountered.
============================================================== ==============================================================
panic_on_stackoverflow:
Controls the kernel's behavior when detecting the overflows of
kernel, IRQ and exception stacks except a user stack.
This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
0: try to continue operation.
1: panic immediately.
==============================================================
pid_max: pid_max:
PID allocation wrap value. When the kernel's next PID value PID allocation wrap value. When the kernel's next PID value

View File

@@ -63,8 +63,11 @@ config DEBUG_STACKOVERFLOW
bool "Check for stack overflows" bool "Check for stack overflows"
depends on DEBUG_KERNEL depends on DEBUG_KERNEL
---help--- ---help---
This option will cause messages to be printed if free stack space Say Y here if you want to check the overflows of kernel, IRQ
drops below a certain limit. and exception stacks. This option will cause messages of the
stacks in detail when free stack space drops below a certain
limit.
If in doubt, say "N".
config X86_PTDUMP config X86_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs" bool "Export kernel pagetable layout to userspace via debugfs"
@@ -284,4 +287,16 @@ config DEBUG_STRICT_USER_COPY_CHECKS
If unsure, or if you run an older (pre 4.4) gcc, say N. If unsure, or if you run an older (pre 4.4) gcc, say N.
config DEBUG_NMI_SELFTEST
bool "NMI Selftest"
depends on DEBUG_KERNEL && X86_LOCAL_APIC
---help---
Enabling this option turns on a quick NMI selftest to verify
that the NMI behaves correctly.
This might help diagnose strange hangs that rely on NMI to
function properly.
If unsure, say N.
endmenu endmenu

View File

@@ -225,5 +225,11 @@ extern int hard_smp_processor_id(void);
#endif /* CONFIG_X86_LOCAL_APIC */ #endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_DEBUG_NMI_SELFTEST
extern void nmi_selftest(void);
#else
#define nmi_selftest() do { } while (0)
#endif
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_SMP_H */ #endif /* _ASM_X86_SMP_H */

View File

@@ -80,6 +80,7 @@ obj-$(CONFIG_APB_TIMER) += apb_timer.o
obj-$(CONFIG_AMD_NB) += amd_nb.o obj-$(CONFIG_AMD_NB) += amd_nb.o
obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
obj-$(CONFIG_KVM_GUEST) += kvm.o obj-$(CONFIG_KVM_GUEST) += kvm.o
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o obj-$(CONFIG_KVM_CLOCK) += kvmclock.o

View File

@@ -28,6 +28,9 @@ DEFINE_PER_CPU(struct pt_regs *, irq_regs);
EXPORT_PER_CPU_SYMBOL(irq_regs); EXPORT_PER_CPU_SYMBOL(irq_regs);
#ifdef CONFIG_DEBUG_STACKOVERFLOW #ifdef CONFIG_DEBUG_STACKOVERFLOW
int sysctl_panic_on_stackoverflow __read_mostly;
/* Debugging check for stack overflow: is there less than 1KB free? */ /* Debugging check for stack overflow: is there less than 1KB free? */
static int check_stack_overflow(void) static int check_stack_overflow(void)
{ {
@@ -43,6 +46,8 @@ static void print_stack_overflow(void)
{ {
printk(KERN_WARNING "low stack detected by irq handler\n"); printk(KERN_WARNING "low stack detected by irq handler\n");
dump_stack(); dump_stack();
if (sysctl_panic_on_stackoverflow)
panic("low stack detected by irq handler - check messages\n");
} }
#else #else

View File

@@ -26,6 +26,8 @@ EXPORT_PER_CPU_SYMBOL(irq_stat);
DEFINE_PER_CPU(struct pt_regs *, irq_regs); DEFINE_PER_CPU(struct pt_regs *, irq_regs);
EXPORT_PER_CPU_SYMBOL(irq_regs); EXPORT_PER_CPU_SYMBOL(irq_regs);
int sysctl_panic_on_stackoverflow;
/* /*
* Probabilistic stack overflow check: * Probabilistic stack overflow check:
* *
@@ -36,18 +38,39 @@ EXPORT_PER_CPU_SYMBOL(irq_regs);
static inline void stack_overflow_check(struct pt_regs *regs) static inline void stack_overflow_check(struct pt_regs *regs)
{ {
#ifdef CONFIG_DEBUG_STACKOVERFLOW #ifdef CONFIG_DEBUG_STACKOVERFLOW
#define STACK_TOP_MARGIN 128
struct orig_ist *oist;
u64 irq_stack_top, irq_stack_bottom;
u64 estack_top, estack_bottom;
u64 curbase = (u64)task_stack_page(current); u64 curbase = (u64)task_stack_page(current);
if (user_mode_vm(regs)) if (user_mode_vm(regs))
return; return;
WARN_ONCE(regs->sp >= curbase && if (regs->sp >= curbase + sizeof(struct thread_info) +
regs->sp <= curbase + THREAD_SIZE && sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
regs->sp < curbase + sizeof(struct thread_info) + regs->sp <= curbase + THREAD_SIZE)
sizeof(struct pt_regs) + 128, return;
"do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack) +
current->comm, curbase, regs->sp); STACK_TOP_MARGIN;
irq_stack_bottom = (u64)__get_cpu_var(irq_stack_ptr);
if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
return;
oist = &__get_cpu_var(orig_ist);
estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN;
estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1];
if (regs->sp >= estack_top && regs->sp <= estack_bottom)
return;
WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
current->comm, curbase, regs->sp,
irq_stack_top, irq_stack_bottom,
estack_top, estack_bottom);
if (sysctl_panic_on_stackoverflow)
panic("low stack detected by irq handler - check messages\n");
#endif #endif
} }

View File

@@ -0,0 +1,180 @@
/*
* arch/x86/kernel/nmi-selftest.c
*
* Testsuite for NMI: IPIs
*
* Started by Don Zickus:
* (using lib/locking-selftest.c as a guide)
*
* Copyright (C) 2011 Red Hat, Inc., Don Zickus <dzickus@redhat.com>
*/
#include <linux/smp.h>
#include <linux/cpumask.h>
#include <linux/delay.h>
#include <asm/apic.h>
#include <asm/nmi.h>
#define SUCCESS 0
#define FAILURE 1
#define TIMEOUT 2
static int nmi_fail;
/* check to see if NMI IPIs work on this machine */
static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __read_mostly;
static int testcase_total;
static int testcase_successes;
static int expected_testcase_failures;
static int unexpected_testcase_failures;
static int unexpected_testcase_unknowns;
static int nmi_unk_cb(unsigned int val, struct pt_regs *regs)
{
unexpected_testcase_unknowns++;
return NMI_HANDLED;
}
static void init_nmi_testsuite(void)
{
/* trap all the unknown NMIs we may generate */
register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
}
static void cleanup_nmi_testsuite(void)
{
unregister_nmi_handler(NMI_UNKNOWN, "nmi_selftest_unk");
}
static int test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs)
{
int cpu = raw_smp_processor_id();
if (cpumask_test_and_clear_cpu(cpu, to_cpumask(nmi_ipi_mask)))
return NMI_HANDLED;
return NMI_DONE;
}
static void test_nmi_ipi(struct cpumask *mask)
{
unsigned long timeout;
if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback,
NMI_FLAG_FIRST, "nmi_selftest")) {
nmi_fail = FAILURE;
return;
}
/* sync above data before sending NMI */
wmb();
apic->send_IPI_mask(mask, NMI_VECTOR);
/* Don't wait longer than a second */
timeout = USEC_PER_SEC;
while (!cpumask_empty(mask) && timeout--)
udelay(1);
/* What happens if we timeout, do we still unregister?? */
unregister_nmi_handler(NMI_LOCAL, "nmi_selftest");
if (!timeout)
nmi_fail = TIMEOUT;
return;
}
static void remote_ipi(void)
{
cpumask_copy(to_cpumask(nmi_ipi_mask), cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask));
if (!cpumask_empty(to_cpumask(nmi_ipi_mask)))
test_nmi_ipi(to_cpumask(nmi_ipi_mask));
}
static void local_ipi(void)
{
cpumask_clear(to_cpumask(nmi_ipi_mask));
cpumask_set_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask));
test_nmi_ipi(to_cpumask(nmi_ipi_mask));
}
static void reset_nmi(void)
{
nmi_fail = 0;
}
static void dotest(void (*testcase_fn)(void), int expected)
{
testcase_fn();
/*
* Filter out expected failures:
*/
if (nmi_fail != expected) {
unexpected_testcase_failures++;
if (nmi_fail == FAILURE)
printk("FAILED |");
else if (nmi_fail == TIMEOUT)
printk("TIMEOUT|");
else
printk("ERROR |");
dump_stack();
} else {
testcase_successes++;
printk(" ok |");
}
testcase_total++;
reset_nmi();
}
static inline void print_testname(const char *testname)
{
printk("%12s:", testname);
}
void nmi_selftest(void)
{
init_nmi_testsuite();
/*
* Run the testsuite:
*/
printk("----------------\n");
printk("| NMI testsuite:\n");
printk("--------------------\n");
print_testname("remote IPI");
dotest(remote_ipi, SUCCESS);
printk("\n");
print_testname("local IPI");
dotest(local_ipi, SUCCESS);
printk("\n");
cleanup_nmi_testsuite();
if (unexpected_testcase_failures) {
printk("--------------------\n");
printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n",
unexpected_testcase_failures, testcase_total);
printk("-----------------------------------------------------------------\n");
} else if (expected_testcase_failures && testcase_successes) {
printk("--------------------\n");
printk("%3d out of %3d testcases failed, as expected. |\n",
expected_testcase_failures, testcase_total);
printk("----------------------------------------------------\n");
} else if (expected_testcase_failures && !testcase_successes) {
printk("--------------------\n");
printk("All %3d testcases failed, as expected. |\n",
expected_testcase_failures);
printk("----------------------------------------\n");
} else {
printk("--------------------\n");
printk("Good, all %3d testcases passed! |\n",
testcase_successes);
printk("---------------------------------\n");
}
}

View File

@@ -29,6 +29,7 @@
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/nmi.h>
/* /*
* Some notes on x86 processor bugs affecting SMP operation: * Some notes on x86 processor bugs affecting SMP operation:
* *
@@ -148,6 +149,60 @@ void native_send_call_func_ipi(const struct cpumask *mask)
free_cpumask_var(allbutself); free_cpumask_var(allbutself);
} }
static atomic_t stopping_cpu = ATOMIC_INIT(-1);
static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
{
/* We are registered on stopping cpu too, avoid spurious NMI */
if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
return NMI_HANDLED;
stop_this_cpu(NULL);
return NMI_HANDLED;
}
static void native_nmi_stop_other_cpus(int wait)
{
unsigned long flags;
unsigned long timeout;
if (reboot_force)
return;
/*
* Use an own vector here because smp_call_function
* does lots of things not suitable in a panic situation.
*/
if (num_online_cpus() > 1) {
/* did someone beat us here? */
if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1)
return;
if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback,
NMI_FLAG_FIRST, "smp_stop"))
/* Note: we ignore failures here */
return;
/* sync above data before sending NMI */
wmb();
apic->send_IPI_allbutself(NMI_VECTOR);
/*
* Don't wait longer than a second if the caller
* didn't ask us to wait.
*/
timeout = USEC_PER_SEC;
while (num_online_cpus() > 1 && (wait || timeout--))
udelay(1);
}
local_irq_save(flags);
disable_local_APIC();
local_irq_restore(flags);
}
/* /*
* this function calls the 'stop' function on all other CPUs in the system. * this function calls the 'stop' function on all other CPUs in the system.
*/ */
@@ -160,7 +215,7 @@ asmlinkage void smp_reboot_interrupt(void)
irq_exit(); irq_exit();
} }
static void native_stop_other_cpus(int wait) static void native_irq_stop_other_cpus(int wait)
{ {
unsigned long flags; unsigned long flags;
unsigned long timeout; unsigned long timeout;
@@ -194,6 +249,11 @@ static void native_stop_other_cpus(int wait)
local_irq_restore(flags); local_irq_restore(flags);
} }
static void native_smp_disable_nmi_ipi(void)
{
smp_ops.stop_other_cpus = native_irq_stop_other_cpus;
}
/* /*
* Reschedule call back. * Reschedule call back.
*/ */
@@ -225,12 +285,20 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
irq_exit(); irq_exit();
} }
static int __init nonmi_ipi_setup(char *str)
{
native_smp_disable_nmi_ipi();
return 1;
}
__setup("nonmi_ipi", nonmi_ipi_setup);
struct smp_ops smp_ops = { struct smp_ops smp_ops = {
.smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
.smp_prepare_cpus = native_smp_prepare_cpus, .smp_prepare_cpus = native_smp_prepare_cpus,
.smp_cpus_done = native_smp_cpus_done, .smp_cpus_done = native_smp_cpus_done,
.stop_other_cpus = native_stop_other_cpus, .stop_other_cpus = native_nmi_stop_other_cpus,
.smp_send_reschedule = native_smp_send_reschedule, .smp_send_reschedule = native_smp_send_reschedule,
.cpu_up = native_cpu_up, .cpu_up = native_cpu_up,

View File

@@ -1143,6 +1143,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
{ {
pr_debug("Boot done.\n"); pr_debug("Boot done.\n");
nmi_selftest();
impress_friends(); impress_friends();
#ifdef CONFIG_X86_IO_APIC #ifdef CONFIG_X86_IO_APIC
setup_ioapic_dest(); setup_ioapic_dest();

View File

@@ -341,6 +341,7 @@ extern int panic_timeout;
extern int panic_on_oops; extern int panic_on_oops;
extern int panic_on_unrecovered_nmi; extern int panic_on_unrecovered_nmi;
extern int panic_on_io_nmi; extern int panic_on_io_nmi;
extern int sysctl_panic_on_stackoverflow;
extern const char *print_tainted(void); extern const char *print_tainted(void);
extern void add_taint(unsigned flag); extern void add_taint(unsigned flag);
extern int test_taint(unsigned flag); extern int test_taint(unsigned flag);

View File

@@ -803,6 +803,15 @@ static struct ctl_table kern_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec, .proc_handler = proc_dointvec,
}, },
#ifdef CONFIG_DEBUG_STACKOVERFLOW
{
.procname = "panic_on_stackoverflow",
.data = &sysctl_panic_on_stackoverflow,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
#endif
{ {
.procname = "bootloader_type", .procname = "bootloader_type",
.data = &bootloader_type, .data = &bootloader_type,