Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-tip
* 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-tip: x86-64: Rework vsyscall emulation and add vsyscall= parameter x86-64: Wire up getcpu syscall x86: Remove unnecessary compile flag tweaks for vsyscall code x86-64: Add vsyscall:emulate_vsyscall trace event x86-64: Add user_64bit_mode paravirt op x86-64, xen: Enable the vvar mapping x86-64: Work around gold bug 13023 x86-64: Move the "user" vsyscall segment out of the data segment. x86-64: Pad vDSO to a page boundary
This commit is contained in:
@@ -2680,6 +2680,27 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||||||
vmpoff= [KNL,S390] Perform z/VM CP command after power off.
|
vmpoff= [KNL,S390] Perform z/VM CP command after power off.
|
||||||
Format: <command>
|
Format: <command>
|
||||||
|
|
||||||
|
vsyscall= [X86-64]
|
||||||
|
Controls the behavior of vsyscalls (i.e. calls to
|
||||||
|
fixed addresses of 0xffffffffff600x00 from legacy
|
||||||
|
code). Most statically-linked binaries and older
|
||||||
|
versions of glibc use these calls. Because these
|
||||||
|
functions are at fixed addresses, they make nice
|
||||||
|
targets for exploits that can control RIP.
|
||||||
|
|
||||||
|
emulate [default] Vsyscalls turn into traps and are
|
||||||
|
emulated reasonably safely.
|
||||||
|
|
||||||
|
native Vsyscalls are native syscall instructions.
|
||||||
|
This is a little bit faster than trapping
|
||||||
|
and makes a few dynamic recompilers work
|
||||||
|
better than they would in emulation mode.
|
||||||
|
It also makes exploits much easier to write.
|
||||||
|
|
||||||
|
none Vsyscalls don't work at all. This makes
|
||||||
|
them quite hard to use for exploits but
|
||||||
|
might break your system.
|
||||||
|
|
||||||
vt.cur_default= [VT] Default cursor shape.
|
vt.cur_default= [VT] Default cursor shape.
|
||||||
Format: 0xCCBBAA, where AA, BB, and CC are the same as
|
Format: 0xCCBBAA, where AA, BB, and CC are the same as
|
||||||
the parameters of the <Esc>[?A;B;Cc escape sequence;
|
the parameters of the <Esc>[?A;B;Cc escape sequence;
|
||||||
|
@@ -27,8 +27,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
|
|||||||
|
|
||||||
desc->base2 = (info->base_addr & 0xff000000) >> 24;
|
desc->base2 = (info->base_addr & 0xff000000) >> 24;
|
||||||
/*
|
/*
|
||||||
* Don't allow setting of the lm bit. It is useless anyway
|
* Don't allow setting of the lm bit. It would confuse
|
||||||
* because 64bit system calls require __USER_CS:
|
* user_64bit_mode and would get overridden by sysret anyway.
|
||||||
*/
|
*/
|
||||||
desc->l = 0;
|
desc->l = 0;
|
||||||
}
|
}
|
||||||
|
@@ -17,7 +17,6 @@
|
|||||||
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
|
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
|
||||||
* Vectors 32 ... 127 : device interrupts
|
* Vectors 32 ... 127 : device interrupts
|
||||||
* Vector 128 : legacy int80 syscall interface
|
* Vector 128 : legacy int80 syscall interface
|
||||||
* Vector 204 : legacy x86_64 vsyscall emulation
|
|
||||||
* Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
|
* Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
|
||||||
* Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
|
* Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
|
||||||
*
|
*
|
||||||
@@ -51,9 +50,6 @@
|
|||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
# define SYSCALL_VECTOR 0x80
|
# define SYSCALL_VECTOR 0x80
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
# define VSYSCALL_EMU_VECTOR 0xcc
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Vectors 0x30-0x3f are used for ISA interrupts.
|
* Vectors 0x30-0x3f are used for ISA interrupts.
|
||||||
|
@@ -41,6 +41,7 @@
|
|||||||
|
|
||||||
#include <asm/desc_defs.h>
|
#include <asm/desc_defs.h>
|
||||||
#include <asm/kmap_types.h>
|
#include <asm/kmap_types.h>
|
||||||
|
#include <asm/pgtable_types.h>
|
||||||
|
|
||||||
struct page;
|
struct page;
|
||||||
struct thread_struct;
|
struct thread_struct;
|
||||||
@@ -63,6 +64,11 @@ struct paravirt_callee_save {
|
|||||||
struct pv_info {
|
struct pv_info {
|
||||||
unsigned int kernel_rpl;
|
unsigned int kernel_rpl;
|
||||||
int shared_kernel_pmd;
|
int shared_kernel_pmd;
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
u16 extra_user_64bit_cs; /* __USER_CS if none */
|
||||||
|
#endif
|
||||||
|
|
||||||
int paravirt_enabled;
|
int paravirt_enabled;
|
||||||
const char *name;
|
const char *name;
|
||||||
};
|
};
|
||||||
|
@@ -131,6 +131,9 @@ struct pt_regs {
|
|||||||
#ifdef __KERNEL__
|
#ifdef __KERNEL__
|
||||||
|
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
|
#ifdef CONFIG_PARAVIRT
|
||||||
|
#include <asm/paravirt_types.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
struct cpuinfo_x86;
|
struct cpuinfo_x86;
|
||||||
struct task_struct;
|
struct task_struct;
|
||||||
@@ -187,6 +190,22 @@ static inline int v8086_mode(struct pt_regs *regs)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
static inline bool user_64bit_mode(struct pt_regs *regs)
|
||||||
|
{
|
||||||
|
#ifndef CONFIG_PARAVIRT
|
||||||
|
/*
|
||||||
|
* On non-paravirt systems, this is the only long mode CPL 3
|
||||||
|
* selector. We do not allow long mode selectors in the LDT.
|
||||||
|
*/
|
||||||
|
return regs->cs == __USER_CS;
|
||||||
|
#else
|
||||||
|
/* Headers are too twisted for this to go in paravirt.h. */
|
||||||
|
return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
|
* X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
|
||||||
* when it traps. The previous stack will be directly underneath the saved
|
* when it traps. The previous stack will be directly underneath the saved
|
||||||
|
@@ -40,7 +40,6 @@ asmlinkage void alignment_check(void);
|
|||||||
asmlinkage void machine_check(void);
|
asmlinkage void machine_check(void);
|
||||||
#endif /* CONFIG_X86_MCE */
|
#endif /* CONFIG_X86_MCE */
|
||||||
asmlinkage void simd_coprocessor_error(void);
|
asmlinkage void simd_coprocessor_error(void);
|
||||||
asmlinkage void emulate_vsyscall(void);
|
|
||||||
|
|
||||||
dotraplinkage void do_divide_error(struct pt_regs *, long);
|
dotraplinkage void do_divide_error(struct pt_regs *, long);
|
||||||
dotraplinkage void do_debug(struct pt_regs *, long);
|
dotraplinkage void do_debug(struct pt_regs *, long);
|
||||||
@@ -67,7 +66,6 @@ dotraplinkage void do_alignment_check(struct pt_regs *, long);
|
|||||||
dotraplinkage void do_machine_check(struct pt_regs *, long);
|
dotraplinkage void do_machine_check(struct pt_regs *, long);
|
||||||
#endif
|
#endif
|
||||||
dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long);
|
dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long);
|
||||||
dotraplinkage void do_emulate_vsyscall(struct pt_regs *, long);
|
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
dotraplinkage void do_iret_error(struct pt_regs *, long);
|
dotraplinkage void do_iret_error(struct pt_regs *, long);
|
||||||
#endif
|
#endif
|
||||||
|
@@ -681,6 +681,8 @@ __SYSCALL(__NR_syncfs, sys_syncfs)
|
|||||||
__SYSCALL(__NR_sendmmsg, sys_sendmmsg)
|
__SYSCALL(__NR_sendmmsg, sys_sendmmsg)
|
||||||
#define __NR_setns 308
|
#define __NR_setns 308
|
||||||
__SYSCALL(__NR_setns, sys_setns)
|
__SYSCALL(__NR_setns, sys_setns)
|
||||||
|
#define __NR_getcpu 309
|
||||||
|
__SYSCALL(__NR_getcpu, sys_getcpu)
|
||||||
|
|
||||||
#ifndef __NO_STUBS
|
#ifndef __NO_STUBS
|
||||||
#define __ARCH_WANT_OLD_READDIR
|
#define __ARCH_WANT_OLD_READDIR
|
||||||
|
@@ -27,6 +27,12 @@ extern struct timezone sys_tz;
|
|||||||
|
|
||||||
extern void map_vsyscall(void);
|
extern void map_vsyscall(void);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called on instruction fetch fault in vsyscall page.
|
||||||
|
* Returns true if handled.
|
||||||
|
*/
|
||||||
|
extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
|
||||||
|
|
||||||
#endif /* __KERNEL__ */
|
#endif /* __KERNEL__ */
|
||||||
|
|
||||||
#endif /* _ASM_X86_VSYSCALL_H */
|
#endif /* _ASM_X86_VSYSCALL_H */
|
||||||
|
@@ -17,19 +17,6 @@ CFLAGS_REMOVE_ftrace.o = -pg
|
|||||||
CFLAGS_REMOVE_early_printk.o = -pg
|
CFLAGS_REMOVE_early_printk.o = -pg
|
||||||
endif
|
endif
|
||||||
|
|
||||||
#
|
|
||||||
# vsyscalls (which work on the user stack) should have
|
|
||||||
# no stack-protector checks:
|
|
||||||
#
|
|
||||||
nostackp := $(call cc-option, -fno-stack-protector)
|
|
||||||
CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
|
|
||||||
CFLAGS_hpet.o := $(nostackp)
|
|
||||||
CFLAGS_paravirt.o := $(nostackp)
|
|
||||||
GCOV_PROFILE_vsyscall_64.o := n
|
|
||||||
GCOV_PROFILE_hpet.o := n
|
|
||||||
GCOV_PROFILE_tsc.o := n
|
|
||||||
GCOV_PROFILE_paravirt.o := n
|
|
||||||
|
|
||||||
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
|
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
|
||||||
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
|
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
|
||||||
obj-y += time.o ioport.o ldt.o dumpstack.o
|
obj-y += time.o ioport.o ldt.o dumpstack.o
|
||||||
|
@@ -1111,7 +1111,6 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
|
|||||||
zeroentry coprocessor_error do_coprocessor_error
|
zeroentry coprocessor_error do_coprocessor_error
|
||||||
errorentry alignment_check do_alignment_check
|
errorentry alignment_check do_alignment_check
|
||||||
zeroentry simd_coprocessor_error do_simd_coprocessor_error
|
zeroentry simd_coprocessor_error do_simd_coprocessor_error
|
||||||
zeroentry emulate_vsyscall do_emulate_vsyscall
|
|
||||||
|
|
||||||
|
|
||||||
/* Reload gs selector with exception handling */
|
/* Reload gs selector with exception handling */
|
||||||
|
@@ -307,6 +307,10 @@ struct pv_info pv_info = {
|
|||||||
.paravirt_enabled = 0,
|
.paravirt_enabled = 0,
|
||||||
.kernel_rpl = 0,
|
.kernel_rpl = 0,
|
||||||
.shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
|
.shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
.extra_user_64bit_cs = __USER_CS,
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
struct pv_init_ops pv_init_ops = {
|
struct pv_init_ops pv_init_ops = {
|
||||||
|
@@ -74,7 +74,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
|
|||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
case 0x40 ... 0x4f:
|
case 0x40 ... 0x4f:
|
||||||
if (regs->cs != __USER_CS)
|
if (!user_64bit_mode(regs))
|
||||||
/* 32-bit mode: register increment */
|
/* 32-bit mode: register increment */
|
||||||
return 0;
|
return 0;
|
||||||
/* 64-bit mode: REX prefix */
|
/* 64-bit mode: REX prefix */
|
||||||
|
@@ -872,12 +872,6 @@ void __init trap_init(void)
|
|||||||
set_bit(SYSCALL_VECTOR, used_vectors);
|
set_bit(SYSCALL_VECTOR, used_vectors);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors));
|
|
||||||
set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall);
|
|
||||||
set_bit(VSYSCALL_EMU_VECTOR, used_vectors);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Should be a barrier for any external CPU state:
|
* Should be a barrier for any external CPU state:
|
||||||
*/
|
*/
|
||||||
|
@@ -71,7 +71,6 @@ PHDRS {
|
|||||||
text PT_LOAD FLAGS(5); /* R_E */
|
text PT_LOAD FLAGS(5); /* R_E */
|
||||||
data PT_LOAD FLAGS(6); /* RW_ */
|
data PT_LOAD FLAGS(6); /* RW_ */
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
user PT_LOAD FLAGS(5); /* R_E */
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
percpu PT_LOAD FLAGS(6); /* RW_ */
|
percpu PT_LOAD FLAGS(6); /* RW_ */
|
||||||
#endif
|
#endif
|
||||||
@@ -154,44 +153,16 @@ SECTIONS
|
|||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
|
|
||||||
#define VSYSCALL_ADDR (-10*1024*1024)
|
. = ALIGN(PAGE_SIZE);
|
||||||
|
|
||||||
#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
|
|
||||||
#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
|
|
||||||
|
|
||||||
#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
|
|
||||||
#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
|
|
||||||
|
|
||||||
. = ALIGN(4096);
|
|
||||||
__vsyscall_0 = .;
|
|
||||||
|
|
||||||
. = VSYSCALL_ADDR;
|
|
||||||
.vsyscall : AT(VLOAD(.vsyscall)) {
|
|
||||||
*(.vsyscall_0)
|
|
||||||
|
|
||||||
. = 1024;
|
|
||||||
*(.vsyscall_1)
|
|
||||||
|
|
||||||
. = 2048;
|
|
||||||
*(.vsyscall_2)
|
|
||||||
|
|
||||||
. = 4096; /* Pad the whole page. */
|
|
||||||
} :user =0xcc
|
|
||||||
. = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE);
|
|
||||||
|
|
||||||
#undef VSYSCALL_ADDR
|
|
||||||
#undef VLOAD_OFFSET
|
|
||||||
#undef VLOAD
|
|
||||||
#undef VVIRT_OFFSET
|
|
||||||
#undef VVIRT
|
|
||||||
|
|
||||||
__vvar_page = .;
|
__vvar_page = .;
|
||||||
|
|
||||||
.vvar : AT(ADDR(.vvar) - LOAD_OFFSET) {
|
.vvar : AT(ADDR(.vvar) - LOAD_OFFSET) {
|
||||||
|
/* work around gold bug 13023 */
|
||||||
|
__vvar_beginning_hack = .;
|
||||||
|
|
||||||
/* Place all vvars at the offsets in asm/vvar.h. */
|
/* Place all vvars at the offsets in asm/vvar.h. */
|
||||||
#define EMIT_VVAR(name, offset) \
|
#define EMIT_VVAR(name, offset) \
|
||||||
. = offset; \
|
. = __vvar_beginning_hack + offset; \
|
||||||
*(.vvar_ ## name)
|
*(.vvar_ ## name)
|
||||||
#define __VVAR_KERNEL_LDS
|
#define __VVAR_KERNEL_LDS
|
||||||
#include <asm/vvar.h>
|
#include <asm/vvar.h>
|
||||||
|
@@ -18,9 +18,6 @@
|
|||||||
* use the vDSO.
|
* use the vDSO.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Disable profiling for userspace code: */
|
|
||||||
#define DISABLE_BRANCH_PROFILING
|
|
||||||
|
|
||||||
#include <linux/time.h>
|
#include <linux/time.h>
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
@@ -50,12 +47,36 @@
|
|||||||
#include <asm/vgtod.h>
|
#include <asm/vgtod.h>
|
||||||
#include <asm/traps.h>
|
#include <asm/traps.h>
|
||||||
|
|
||||||
|
#define CREATE_TRACE_POINTS
|
||||||
|
#include "vsyscall_trace.h"
|
||||||
|
|
||||||
DEFINE_VVAR(int, vgetcpu_mode);
|
DEFINE_VVAR(int, vgetcpu_mode);
|
||||||
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
|
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
|
||||||
{
|
{
|
||||||
.lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
|
.lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
|
||||||
|
|
||||||
|
static int __init vsyscall_setup(char *str)
|
||||||
|
{
|
||||||
|
if (str) {
|
||||||
|
if (!strcmp("emulate", str))
|
||||||
|
vsyscall_mode = EMULATE;
|
||||||
|
else if (!strcmp("native", str))
|
||||||
|
vsyscall_mode = NATIVE;
|
||||||
|
else if (!strcmp("none", str))
|
||||||
|
vsyscall_mode = NONE;
|
||||||
|
else
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
early_param("vsyscall", vsyscall_setup);
|
||||||
|
|
||||||
void update_vsyscall_tz(void)
|
void update_vsyscall_tz(void)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
@@ -100,7 +121,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
|
|||||||
|
|
||||||
printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
|
printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
|
||||||
level, tsk->comm, task_pid_nr(tsk),
|
level, tsk->comm, task_pid_nr(tsk),
|
||||||
message, regs->ip - 2, regs->cs,
|
message, regs->ip, regs->cs,
|
||||||
regs->sp, regs->ax, regs->si, regs->di);
|
regs->sp, regs->ax, regs->si, regs->di);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -118,46 +139,39 @@ static int addr_to_vsyscall_nr(unsigned long addr)
|
|||||||
return nr;
|
return nr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
|
bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
|
||||||
{
|
{
|
||||||
struct task_struct *tsk;
|
struct task_struct *tsk;
|
||||||
unsigned long caller;
|
unsigned long caller;
|
||||||
int vsyscall_nr;
|
int vsyscall_nr;
|
||||||
long ret;
|
long ret;
|
||||||
|
|
||||||
local_irq_enable();
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Real 64-bit user mode code has cs == __USER_CS. Anything else
|
* No point in checking CS -- the only way to get here is a user mode
|
||||||
* is bogus.
|
* trap to a high address, which means that we're in 64-bit user code.
|
||||||
*/
|
*/
|
||||||
if (regs->cs != __USER_CS) {
|
|
||||||
/*
|
|
||||||
* If we trapped from kernel mode, we might as well OOPS now
|
|
||||||
* instead of returning to some random address and OOPSing
|
|
||||||
* then.
|
|
||||||
*/
|
|
||||||
BUG_ON(!user_mode(regs));
|
|
||||||
|
|
||||||
/* Compat mode and non-compat 32-bit CS should both segfault. */
|
WARN_ON_ONCE(address != regs->ip);
|
||||||
warn_bad_vsyscall(KERN_WARNING, regs,
|
|
||||||
"illegal int 0xcc from 32-bit mode");
|
if (vsyscall_mode == NONE) {
|
||||||
goto sigsegv;
|
warn_bad_vsyscall(KERN_INFO, regs,
|
||||||
|
"vsyscall attempted with vsyscall=none");
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
vsyscall_nr = addr_to_vsyscall_nr(address);
|
||||||
* x86-ism here: regs->ip points to the instruction after the int 0xcc,
|
|
||||||
* and int 0xcc is two bytes long.
|
trace_emulate_vsyscall(vsyscall_nr);
|
||||||
*/
|
|
||||||
vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2);
|
|
||||||
if (vsyscall_nr < 0) {
|
if (vsyscall_nr < 0) {
|
||||||
warn_bad_vsyscall(KERN_WARNING, regs,
|
warn_bad_vsyscall(KERN_WARNING, regs,
|
||||||
"illegal int 0xcc (exploit attempt?)");
|
"misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround");
|
||||||
goto sigsegv;
|
goto sigsegv;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
|
if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
|
||||||
warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)");
|
warn_bad_vsyscall(KERN_WARNING, regs,
|
||||||
|
"vsyscall with bad stack (exploit attempt?)");
|
||||||
goto sigsegv;
|
goto sigsegv;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -202,13 +216,11 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
|
|||||||
regs->ip = caller;
|
regs->ip = caller;
|
||||||
regs->sp += 8;
|
regs->sp += 8;
|
||||||
|
|
||||||
local_irq_disable();
|
return true;
|
||||||
return;
|
|
||||||
|
|
||||||
sigsegv:
|
sigsegv:
|
||||||
regs->ip -= 2; /* The faulting instruction should be the int 0xcc. */
|
|
||||||
force_sig(SIGSEGV, current);
|
force_sig(SIGSEGV, current);
|
||||||
local_irq_disable();
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -256,15 +268,21 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
|
|||||||
|
|
||||||
void __init map_vsyscall(void)
|
void __init map_vsyscall(void)
|
||||||
{
|
{
|
||||||
extern char __vsyscall_0;
|
extern char __vsyscall_page;
|
||||||
unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
|
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
|
||||||
extern char __vvar_page;
|
extern char __vvar_page;
|
||||||
unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
|
unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
|
||||||
|
|
||||||
/* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
|
__set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall,
|
||||||
__set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
|
vsyscall_mode == NATIVE
|
||||||
|
? PAGE_KERNEL_VSYSCALL
|
||||||
|
: PAGE_KERNEL_VVAR);
|
||||||
|
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) !=
|
||||||
|
(unsigned long)VSYSCALL_START);
|
||||||
|
|
||||||
__set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR);
|
__set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR);
|
||||||
BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS);
|
BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) !=
|
||||||
|
(unsigned long)VVAR_ADDRESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __init vsyscall_init(void)
|
static int __init vsyscall_init(void)
|
||||||
|
@@ -7,21 +7,31 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/linkage.h>
|
#include <linux/linkage.h>
|
||||||
|
|
||||||
#include <asm/irq_vectors.h>
|
#include <asm/irq_vectors.h>
|
||||||
|
#include <asm/page_types.h>
|
||||||
|
#include <asm/unistd_64.h>
|
||||||
|
|
||||||
/* The unused parts of the page are filled with 0xcc by the linker script. */
|
__PAGE_ALIGNED_DATA
|
||||||
|
.globl __vsyscall_page
|
||||||
|
.balign PAGE_SIZE, 0xcc
|
||||||
|
.type __vsyscall_page, @object
|
||||||
|
__vsyscall_page:
|
||||||
|
|
||||||
.section .vsyscall_0, "a"
|
mov $__NR_gettimeofday, %rax
|
||||||
ENTRY(vsyscall_0)
|
syscall
|
||||||
int $VSYSCALL_EMU_VECTOR
|
ret
|
||||||
END(vsyscall_0)
|
|
||||||
|
|
||||||
.section .vsyscall_1, "a"
|
.balign 1024, 0xcc
|
||||||
ENTRY(vsyscall_1)
|
mov $__NR_time, %rax
|
||||||
int $VSYSCALL_EMU_VECTOR
|
syscall
|
||||||
END(vsyscall_1)
|
ret
|
||||||
|
|
||||||
.section .vsyscall_2, "a"
|
.balign 1024, 0xcc
|
||||||
ENTRY(vsyscall_2)
|
mov $__NR_getcpu, %rax
|
||||||
int $VSYSCALL_EMU_VECTOR
|
syscall
|
||||||
END(vsyscall_2)
|
ret
|
||||||
|
|
||||||
|
.balign 4096, 0xcc
|
||||||
|
|
||||||
|
.size __vsyscall_page, 4096
|
||||||
|
29
arch/x86/kernel/vsyscall_trace.h
Normal file
29
arch/x86/kernel/vsyscall_trace.h
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
#undef TRACE_SYSTEM
|
||||||
|
#define TRACE_SYSTEM vsyscall
|
||||||
|
|
||||||
|
#if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||||
|
#define __VSYSCALL_TRACE_H
|
||||||
|
|
||||||
|
#include <linux/tracepoint.h>
|
||||||
|
|
||||||
|
TRACE_EVENT(emulate_vsyscall,
|
||||||
|
|
||||||
|
TP_PROTO(int nr),
|
||||||
|
|
||||||
|
TP_ARGS(nr),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(__field(int, nr)),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->nr = nr;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("nr = %d", __entry->nr)
|
||||||
|
);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#undef TRACE_INCLUDE_PATH
|
||||||
|
#define TRACE_INCLUDE_PATH ../../arch/x86/kernel
|
||||||
|
#define TRACE_INCLUDE_FILE vsyscall_trace
|
||||||
|
#include <trace/define_trace.h>
|
@@ -105,7 +105,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
|
|||||||
* but for now it's good enough to assume that long
|
* but for now it's good enough to assume that long
|
||||||
* mode only uses well known segments or kernel.
|
* mode only uses well known segments or kernel.
|
||||||
*/
|
*/
|
||||||
return (!user_mode(regs)) || (regs->cs == __USER_CS);
|
return (!user_mode(regs) || user_64bit_mode(regs));
|
||||||
#endif
|
#endif
|
||||||
case 0x60:
|
case 0x60:
|
||||||
/* 0x64 thru 0x67 are valid prefixes in all modes. */
|
/* 0x64 thru 0x67 are valid prefixes in all modes. */
|
||||||
@@ -720,6 +720,18 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
|||||||
if (is_errata100(regs, address))
|
if (is_errata100(regs, address))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
/*
|
||||||
|
* Instruction fetch faults in the vsyscall page might need
|
||||||
|
* emulation.
|
||||||
|
*/
|
||||||
|
if (unlikely((error_code & PF_INSTR) &&
|
||||||
|
((address & ~0xfff) == VSYSCALL_START))) {
|
||||||
|
if (emulate_vsyscall(regs, address))
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (unlikely(show_unhandled_signals))
|
if (unlikely(show_unhandled_signals))
|
||||||
show_signal_msg(regs, error_code, address, tsk);
|
show_signal_msg(regs, error_code, address, tsk);
|
||||||
|
|
||||||
|
@@ -9,6 +9,7 @@ __PAGE_ALIGNED_DATA
|
|||||||
vdso_start:
|
vdso_start:
|
||||||
.incbin "arch/x86/vdso/vdso.so"
|
.incbin "arch/x86/vdso/vdso.so"
|
||||||
vdso_end:
|
vdso_end:
|
||||||
|
.align PAGE_SIZE /* extra data here leaks to userspace. */
|
||||||
|
|
||||||
.previous
|
.previous
|
||||||
|
|
||||||
|
@@ -951,6 +951,10 @@ static const struct pv_info xen_info __initconst = {
|
|||||||
.paravirt_enabled = 1,
|
.paravirt_enabled = 1,
|
||||||
.shared_kernel_pmd = 0,
|
.shared_kernel_pmd = 0,
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
.extra_user_64bit_cs = FLAT_USER_CS64,
|
||||||
|
#endif
|
||||||
|
|
||||||
.name = "Xen",
|
.name = "Xen",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -1916,6 +1916,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
|
|||||||
# endif
|
# endif
|
||||||
#else
|
#else
|
||||||
case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
|
case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
|
||||||
|
case VVAR_PAGE:
|
||||||
#endif
|
#endif
|
||||||
case FIX_TEXT_POKE0:
|
case FIX_TEXT_POKE0:
|
||||||
case FIX_TEXT_POKE1:
|
case FIX_TEXT_POKE1:
|
||||||
@@ -1956,7 +1957,8 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
|
|||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
/* Replicate changes to map the vsyscall page into the user
|
/* Replicate changes to map the vsyscall page into the user
|
||||||
pagetable vsyscall mapping. */
|
pagetable vsyscall mapping. */
|
||||||
if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
|
if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) ||
|
||||||
|
idx == VVAR_PAGE) {
|
||||||
unsigned long vaddr = __fix_to_virt(idx);
|
unsigned long vaddr = __fix_to_virt(idx);
|
||||||
set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
|
set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user