Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits) x86: Fix atomic64_xxx_cx8() functions x86: Fix and improve cmpxchg_double{,_local}() x86_64, asm: Optimise fls(), ffs() and fls64() x86, bitops: Move fls64.h inside __KERNEL__ x86: Fix and improve percpu_cmpxchg{8,16}b_double() x86: Report cpb and eff_freq_ro flags correctly x86/i386: Use less assembly in strlen(), speed things up a bit x86: Use the same node_distance for 32 and 64-bit x86: Fix rflags in FAKE_STACK_FRAME x86: Clean up and extend do_int3() x86: Call do_notify_resume() with interrupts enabled x86/div64: Add a micro-optimization shortcut if base is power of two x86-64: Cleanup some assembly entry points x86-64: Slightly shorten line system call entry and exit paths x86-64: Reduce amount of redundant code generated for invalidate_interruptNN x86-64: Slightly shorten int_ret_from_sys_call x86, efi: Convert efi_phys_get_time() args to physical addresses x86: Default to vsyscall=emulate x86-64: Set siginfo and context on vsyscall emulation faults x86: consolidate xchg and xadd macros ...
This commit is contained in:
@ -16,5 +16,6 @@ const char *const x86_power_flags[32] = {
|
||||
"100mhzsteps",
|
||||
"hwpstate",
|
||||
"", /* tsc invariant mapped to constant_tsc */
|
||||
/* nothing */
|
||||
"cpb", /* core performance boost */
|
||||
"eff_freq_ro", /* Readonly aperf/mperf */
|
||||
};
|
||||
|
@ -625,6 +625,8 @@ work_notifysig: # deal with pending signals and
|
||||
movl %esp, %eax
|
||||
jne work_notifysig_v86 # returning to kernel-space or
|
||||
# vm86-space
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
xorl %edx, %edx
|
||||
call do_notify_resume
|
||||
jmp resume_userspace_sig
|
||||
@ -638,6 +640,8 @@ work_notifysig_v86:
|
||||
#else
|
||||
movl %esp, %eax
|
||||
#endif
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
xorl %edx, %edx
|
||||
call do_notify_resume
|
||||
jmp resume_userspace_sig
|
||||
|
@ -221,7 +221,7 @@ ENDPROC(native_usergs_sysret64)
|
||||
/*CFI_REL_OFFSET ss,0*/
|
||||
pushq_cfi %rax /* rsp */
|
||||
CFI_REL_OFFSET rsp,0
|
||||
pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */
|
||||
pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
|
||||
/*CFI_REL_OFFSET rflags,0*/
|
||||
pushq_cfi $__KERNEL_CS /* cs */
|
||||
/*CFI_REL_OFFSET cs,0*/
|
||||
@ -411,7 +411,7 @@ ENTRY(ret_from_fork)
|
||||
RESTORE_REST
|
||||
|
||||
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
|
||||
je int_ret_from_sys_call
|
||||
jz retint_restore_args
|
||||
|
||||
testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
|
||||
jnz int_ret_from_sys_call
|
||||
@ -465,7 +465,7 @@ ENTRY(system_call)
|
||||
* after the swapgs, so that it can do the swapgs
|
||||
* for the guest and jump here on syscall.
|
||||
*/
|
||||
ENTRY(system_call_after_swapgs)
|
||||
GLOBAL(system_call_after_swapgs)
|
||||
|
||||
movq %rsp,PER_CPU_VAR(old_rsp)
|
||||
movq PER_CPU_VAR(kernel_stack),%rsp
|
||||
@ -478,8 +478,7 @@ ENTRY(system_call_after_swapgs)
|
||||
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
|
||||
movq %rcx,RIP-ARGOFFSET(%rsp)
|
||||
CFI_REL_OFFSET rip,RIP-ARGOFFSET
|
||||
GET_THREAD_INFO(%rcx)
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
|
||||
jnz tracesys
|
||||
system_call_fastpath:
|
||||
cmpq $__NR_syscall_max,%rax
|
||||
@ -496,10 +495,9 @@ ret_from_sys_call:
|
||||
/* edi: flagmask */
|
||||
sysret_check:
|
||||
LOCKDEP_SYS_EXIT
|
||||
GET_THREAD_INFO(%rcx)
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
movl TI_flags(%rcx),%edx
|
||||
movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
|
||||
andl %edi,%edx
|
||||
jnz sysret_careful
|
||||
CFI_REMEMBER_STATE
|
||||
@ -583,7 +581,7 @@ sysret_audit:
|
||||
/* Do syscall tracing */
|
||||
tracesys:
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
|
||||
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
|
||||
jz auditsys
|
||||
#endif
|
||||
SAVE_REST
|
||||
@ -612,8 +610,6 @@ tracesys:
|
||||
GLOBAL(int_ret_from_sys_call)
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
testl $3,CS-ARGOFFSET(%rsp)
|
||||
je retint_restore_args
|
||||
movl $_TIF_ALLWORK_MASK,%edi
|
||||
/* edi: mask to check */
|
||||
GLOBAL(int_with_check)
|
||||
@ -953,6 +949,7 @@ END(common_interrupt)
|
||||
ENTRY(\sym)
|
||||
INTR_FRAME
|
||||
pushq_cfi $~(\num)
|
||||
.Lcommon_\sym:
|
||||
interrupt \do_sym
|
||||
jmp ret_from_intr
|
||||
CFI_ENDPROC
|
||||
@ -976,13 +973,21 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
|
||||
x86_platform_ipi smp_x86_platform_ipi
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
|
||||
ALIGN
|
||||
INTR_FRAME
|
||||
.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
|
||||
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
|
||||
.if NUM_INVALIDATE_TLB_VECTORS > \idx
|
||||
apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
|
||||
invalidate_interrupt\idx smp_invalidate_interrupt
|
||||
ENTRY(invalidate_interrupt\idx)
|
||||
pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx)
|
||||
jmp .Lcommon_invalidate_interrupt0
|
||||
CFI_ADJUST_CFA_OFFSET -8
|
||||
END(invalidate_interrupt\idx)
|
||||
.endif
|
||||
.endr
|
||||
CFI_ENDPROC
|
||||
apicinterrupt INVALIDATE_TLB_VECTOR_START, \
|
||||
invalidate_interrupt0, smp_invalidate_interrupt
|
||||
#endif
|
||||
|
||||
apicinterrupt THRESHOLD_APIC_VECTOR \
|
||||
|
@ -293,7 +293,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
|
||||
regs.orig_ax = -1;
|
||||
regs.ip = (unsigned long) kernel_thread_helper;
|
||||
regs.cs = __KERNEL_CS | get_kernel_rpl();
|
||||
regs.flags = X86_EFLAGS_IF | 0x2;
|
||||
regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
|
||||
|
||||
/* Ok, create the new process.. */
|
||||
return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL);
|
||||
|
@ -306,15 +306,10 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
|
||||
== NOTIFY_STOP)
|
||||
return;
|
||||
#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
|
||||
#ifdef CONFIG_KPROBES
|
||||
|
||||
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
|
||||
== NOTIFY_STOP)
|
||||
return;
|
||||
#else
|
||||
if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
|
||||
== NOTIFY_STOP)
|
||||
return;
|
||||
#endif
|
||||
|
||||
preempt_conditional_sti(regs);
|
||||
do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
|
||||
|
@ -57,7 +57,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
|
||||
.lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
|
||||
};
|
||||
|
||||
static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE;
|
||||
static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
|
||||
|
||||
static int __init vsyscall_setup(char *str)
|
||||
{
|
||||
@ -140,11 +140,40 @@ static int addr_to_vsyscall_nr(unsigned long addr)
|
||||
return nr;
|
||||
}
|
||||
|
||||
static bool write_ok_or_segv(unsigned long ptr, size_t size)
|
||||
{
|
||||
/*
|
||||
* XXX: if access_ok, get_user, and put_user handled
|
||||
* sig_on_uaccess_error, this could go away.
|
||||
*/
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) {
|
||||
siginfo_t info;
|
||||
struct thread_struct *thread = ¤t->thread;
|
||||
|
||||
thread->error_code = 6; /* user fault, no page, write */
|
||||
thread->cr2 = ptr;
|
||||
thread->trap_no = 14;
|
||||
|
||||
memset(&info, 0, sizeof(info));
|
||||
info.si_signo = SIGSEGV;
|
||||
info.si_errno = 0;
|
||||
info.si_code = SEGV_MAPERR;
|
||||
info.si_addr = (void __user *)ptr;
|
||||
|
||||
force_sig_info(SIGSEGV, &info, current);
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
unsigned long caller;
|
||||
int vsyscall_nr;
|
||||
int prev_sig_on_uaccess_error;
|
||||
long ret;
|
||||
|
||||
/*
|
||||
@ -180,35 +209,65 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
|
||||
if (seccomp_mode(&tsk->seccomp))
|
||||
do_exit(SIGKILL);
|
||||
|
||||
/*
|
||||
* With a real vsyscall, page faults cause SIGSEGV. We want to
|
||||
* preserve that behavior to make writing exploits harder.
|
||||
*/
|
||||
prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
|
||||
current_thread_info()->sig_on_uaccess_error = 1;
|
||||
|
||||
/*
|
||||
* 0 is a valid user pointer (in the access_ok sense) on 32-bit and
|
||||
* 64-bit, so we don't need to special-case it here. For all the
|
||||
* vsyscalls, 0 means "don't write anything" not "write it at
|
||||
* address 0".
|
||||
*/
|
||||
ret = -EFAULT;
|
||||
switch (vsyscall_nr) {
|
||||
case 0:
|
||||
if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
|
||||
!write_ok_or_segv(regs->si, sizeof(struct timezone)))
|
||||
break;
|
||||
|
||||
ret = sys_gettimeofday(
|
||||
(struct timeval __user *)regs->di,
|
||||
(struct timezone __user *)regs->si);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
if (!write_ok_or_segv(regs->di, sizeof(time_t)))
|
||||
break;
|
||||
|
||||
ret = sys_time((time_t __user *)regs->di);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
|
||||
!write_ok_or_segv(regs->si, sizeof(unsigned)))
|
||||
break;
|
||||
|
||||
ret = sys_getcpu((unsigned __user *)regs->di,
|
||||
(unsigned __user *)regs->si,
|
||||
0);
|
||||
break;
|
||||
}
|
||||
|
||||
current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
|
||||
|
||||
if (ret == -EFAULT) {
|
||||
/*
|
||||
* Bad news -- userspace fed a bad pointer to a vsyscall.
|
||||
*
|
||||
* With a real vsyscall, that would have caused SIGSEGV.
|
||||
* To make writing reliable exploits using the emulated
|
||||
* vsyscalls harder, generate SIGSEGV here as well.
|
||||
*/
|
||||
/* Bad news -- userspace fed a bad pointer to a vsyscall. */
|
||||
warn_bad_vsyscall(KERN_INFO, regs,
|
||||
"vsyscall fault (exploit attempt?)");
|
||||
goto sigsegv;
|
||||
|
||||
/*
|
||||
* If we failed to generate a signal for any reason,
|
||||
* generate one here. (This should be impossible.)
|
||||
*/
|
||||
if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
|
||||
!sigismember(&tsk->pending.signal, SIGSEGV)))
|
||||
goto sigsegv;
|
||||
|
||||
return true; /* Don't emulate the ret. */
|
||||
}
|
||||
|
||||
regs->ax = ret;
|
||||
|
Reference in New Issue
Block a user