Merge tag 'x86-mce-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull x86/mce merge window patches from Tony Luck: "Including two that make error_context() checks less sucky" * tag 'x86-mce-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras: x86/mce: Add instruction recovery signatures to mce-severity table x86/mce: Fix check for processor context when machine check was taken. MCE: Fix vm86 handling for 32bit mce handler x86/mce Add validation check before GHES error is recorded x86/mce: Avoid reading every machine check bank register twice.
This commit is contained in:
@@ -42,7 +42,8 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
|
||||
struct mce m;
|
||||
|
||||
/* Only corrected MC is reported */
|
||||
if (!corrected)
|
||||
if (!corrected || !(mem_err->validation_bits &
|
||||
CPER_MEM_VALID_PHYSICAL_ADDRESS))
|
||||
return;
|
||||
|
||||
mce_setup(&m);
|
||||
|
@@ -126,6 +126,16 @@ static struct severity {
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
|
||||
USER
|
||||
),
|
||||
MCESEV(
|
||||
KEEP, "HT thread notices Action required: instruction fetch error",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
|
||||
MCGMASK(MCG_STATUS_EIPV, 0)
|
||||
),
|
||||
MCESEV(
|
||||
AR, "Action required: instruction fetch error",
|
||||
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
|
||||
USER
|
||||
),
|
||||
#endif
|
||||
MCESEV(
|
||||
PANIC, "Action required: unknown MCACOD",
|
||||
@@ -165,15 +175,19 @@ static struct severity {
|
||||
};
|
||||
|
||||
/*
|
||||
* If the EIPV bit is set, it means the saved IP is the
|
||||
* instruction which caused the MCE.
|
||||
* If mcgstatus indicated that ip/cs on the stack were
|
||||
* no good, then "m->cs" will be zero and we will have
|
||||
* to assume the worst case (IN_KERNEL) as we actually
|
||||
* have no idea what we were executing when the machine
|
||||
* check hit.
|
||||
* If we do have a good "m->cs" (or a faked one in the
|
||||
* case we were executing in VM86 mode) we can use it to
|
||||
* distinguish an exception taken in user from from one
|
||||
* taken in the kernel.
|
||||
*/
|
||||
static int error_context(struct mce *m)
|
||||
{
|
||||
if (m->mcgstatus & MCG_STATUS_EIPV)
|
||||
return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
|
||||
/* Unknown, assume kernel */
|
||||
return IN_KERNEL;
|
||||
return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
|
||||
}
|
||||
|
||||
int mce_severity(struct mce *m, int tolerant, char **msg)
|
||||
|
@@ -437,6 +437,14 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
|
||||
if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) {
|
||||
m->ip = regs->ip;
|
||||
m->cs = regs->cs;
|
||||
|
||||
/*
|
||||
* When in VM86 mode make the cs look like ring 3
|
||||
* always. This is a lie, but it's better than passing
|
||||
* the additional vm86 bit around everywhere.
|
||||
*/
|
||||
if (v8086_mode(regs))
|
||||
m->cs |= 3;
|
||||
}
|
||||
/* Use accurate RIP reporting if available. */
|
||||
if (rip_msr)
|
||||
@@ -641,16 +649,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
|
||||
* Do a quick check if any of the events requires a panic.
|
||||
* This decides if we keep the events around or clear them.
|
||||
*/
|
||||
static int mce_no_way_out(struct mce *m, char **msg)
|
||||
static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp)
|
||||
{
|
||||
int i;
|
||||
int i, ret = 0;
|
||||
|
||||
for (i = 0; i < banks; i++) {
|
||||
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
|
||||
if (m->status & MCI_STATUS_VAL)
|
||||
__set_bit(i, validp);
|
||||
if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
|
||||
return 1;
|
||||
ret = 1;
|
||||
}
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1013,6 +1023,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
*/
|
||||
int kill_it = 0;
|
||||
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
|
||||
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
|
||||
char *msg = "Unknown";
|
||||
|
||||
atomic_inc(&mce_entry);
|
||||
@@ -1027,7 +1038,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
final = &__get_cpu_var(mces_seen);
|
||||
*final = m;
|
||||
|
||||
no_way_out = mce_no_way_out(&m, &msg);
|
||||
memset(valid_banks, 0, sizeof(valid_banks));
|
||||
no_way_out = mce_no_way_out(&m, &msg, valid_banks);
|
||||
|
||||
barrier();
|
||||
|
||||
@@ -1047,6 +1059,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
order = mce_start(&no_way_out);
|
||||
for (i = 0; i < banks; i++) {
|
||||
__clear_bit(i, toclear);
|
||||
if (!test_bit(i, valid_banks))
|
||||
continue;
|
||||
if (!mce_banks[i].ctl)
|
||||
continue;
|
||||
|
||||
|
Reference in New Issue
Block a user