Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86-64: move clts into batch cpu state updates when preloading fpu x86-64: move unlazy_fpu() into lazy cpu state part of context switch x86-32: make sure clts is batched during context switch x86: split out core __math_state_restore
This commit is contained in:
@@ -26,6 +26,7 @@ extern void fpu_init(void);
|
|||||||
extern void mxcsr_feature_mask_init(void);
|
extern void mxcsr_feature_mask_init(void);
|
||||||
extern int init_fpu(struct task_struct *child);
|
extern int init_fpu(struct task_struct *child);
|
||||||
extern asmlinkage void math_state_restore(void);
|
extern asmlinkage void math_state_restore(void);
|
||||||
|
extern void __math_state_restore(void);
|
||||||
extern void init_thread_xstate(void);
|
extern void init_thread_xstate(void);
|
||||||
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
|
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
|
||||||
|
|
||||||
|
@@ -350,14 +350,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||||||
*next = &next_p->thread;
|
*next = &next_p->thread;
|
||||||
int cpu = smp_processor_id();
|
int cpu = smp_processor_id();
|
||||||
struct tss_struct *tss = &per_cpu(init_tss, cpu);
|
struct tss_struct *tss = &per_cpu(init_tss, cpu);
|
||||||
|
bool preload_fpu;
|
||||||
|
|
||||||
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
|
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the task has used fpu the last 5 timeslices, just do a full
|
||||||
|
* restore of the math state immediately to avoid the trap; the
|
||||||
|
* chances of needing FPU soon are obviously high now
|
||||||
|
*/
|
||||||
|
preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
|
||||||
|
|
||||||
__unlazy_fpu(prev_p);
|
__unlazy_fpu(prev_p);
|
||||||
|
|
||||||
|
|
||||||
/* we're going to use this soon, after a few expensive things */
|
/* we're going to use this soon, after a few expensive things */
|
||||||
if (next_p->fpu_counter > 5)
|
if (preload_fpu)
|
||||||
prefetch(next->xstate);
|
prefetch(next->xstate);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -398,6 +405,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||||||
task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
|
task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
|
||||||
__switch_to_xtra(prev_p, next_p, tss);
|
__switch_to_xtra(prev_p, next_p, tss);
|
||||||
|
|
||||||
|
/* If we're going to preload the fpu context, make sure clts
|
||||||
|
is run while we're batching the cpu state updates. */
|
||||||
|
if (preload_fpu)
|
||||||
|
clts();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Leave lazy mode, flushing any hypercalls made here.
|
* Leave lazy mode, flushing any hypercalls made here.
|
||||||
* This must be done before restoring TLS segments so
|
* This must be done before restoring TLS segments so
|
||||||
@@ -407,15 +419,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||||||
*/
|
*/
|
||||||
arch_end_context_switch(next_p);
|
arch_end_context_switch(next_p);
|
||||||
|
|
||||||
/* If the task has used fpu the last 5 timeslices, just do a full
|
if (preload_fpu)
|
||||||
* restore of the math state immediately to avoid the trap; the
|
__math_state_restore();
|
||||||
* chances of needing FPU soon are obviously high now
|
|
||||||
*
|
|
||||||
* tsk_used_math() checks prevent calling math_state_restore(),
|
|
||||||
* which can sleep in the case of !tsk_used_math()
|
|
||||||
*/
|
|
||||||
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
|
|
||||||
math_state_restore();
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Restore %gs if needed (which is common)
|
* Restore %gs if needed (which is common)
|
||||||
|
@@ -386,9 +386,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||||||
int cpu = smp_processor_id();
|
int cpu = smp_processor_id();
|
||||||
struct tss_struct *tss = &per_cpu(init_tss, cpu);
|
struct tss_struct *tss = &per_cpu(init_tss, cpu);
|
||||||
unsigned fsindex, gsindex;
|
unsigned fsindex, gsindex;
|
||||||
|
bool preload_fpu;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the task has used fpu the last 5 timeslices, just do a full
|
||||||
|
* restore of the math state immediately to avoid the trap; the
|
||||||
|
* chances of needing FPU soon are obviously high now
|
||||||
|
*/
|
||||||
|
preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
|
||||||
|
|
||||||
/* we're going to use this soon, after a few expensive things */
|
/* we're going to use this soon, after a few expensive things */
|
||||||
if (next_p->fpu_counter > 5)
|
if (preload_fpu)
|
||||||
prefetch(next->xstate);
|
prefetch(next->xstate);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -419,6 +427,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||||||
|
|
||||||
load_TLS(next, cpu);
|
load_TLS(next, cpu);
|
||||||
|
|
||||||
|
/* Must be after DS reload */
|
||||||
|
unlazy_fpu(prev_p);
|
||||||
|
|
||||||
|
/* Make sure cpu is ready for new context */
|
||||||
|
if (preload_fpu)
|
||||||
|
clts();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Leave lazy mode, flushing any hypercalls made here.
|
* Leave lazy mode, flushing any hypercalls made here.
|
||||||
* This must be done before restoring TLS segments so
|
* This must be done before restoring TLS segments so
|
||||||
@@ -459,9 +474,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||||||
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
|
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
|
||||||
prev->gsindex = gsindex;
|
prev->gsindex = gsindex;
|
||||||
|
|
||||||
/* Must be after DS reload */
|
|
||||||
unlazy_fpu(prev_p);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Switch the PDA and FPU contexts.
|
* Switch the PDA and FPU contexts.
|
||||||
*/
|
*/
|
||||||
@@ -480,15 +492,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|||||||
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
|
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
|
||||||
__switch_to_xtra(prev_p, next_p, tss);
|
__switch_to_xtra(prev_p, next_p, tss);
|
||||||
|
|
||||||
/* If the task has used fpu the last 5 timeslices, just do a full
|
/*
|
||||||
* restore of the math state immediately to avoid the trap; the
|
* Preload the FPU context, now that we've determined that the
|
||||||
* chances of needing FPU soon are obviously high now
|
* task is likely to be using it.
|
||||||
*
|
|
||||||
* tsk_used_math() checks prevent calling math_state_restore(),
|
|
||||||
* which can sleep in the case of !tsk_used_math()
|
|
||||||
*/
|
*/
|
||||||
if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
|
if (preload_fpu)
|
||||||
math_state_restore();
|
__math_state_restore();
|
||||||
return prev_p;
|
return prev_p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -794,6 +794,28 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* __math_state_restore assumes that cr0.TS is already clear and the
|
||||||
|
* fpu state is all ready for use. Used during context switch.
|
||||||
|
*/
|
||||||
|
void __math_state_restore(void)
|
||||||
|
{
|
||||||
|
struct thread_info *thread = current_thread_info();
|
||||||
|
struct task_struct *tsk = thread->task;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
|
||||||
|
*/
|
||||||
|
if (unlikely(restore_fpu_checking(tsk))) {
|
||||||
|
stts();
|
||||||
|
force_sig(SIGSEGV, tsk);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
|
||||||
|
tsk->fpu_counter++;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* 'math_state_restore()' saves the current math information in the
|
* 'math_state_restore()' saves the current math information in the
|
||||||
* old math state array, and gets the new ones from the current task
|
* old math state array, and gets the new ones from the current task
|
||||||
@@ -825,17 +847,8 @@ asmlinkage void math_state_restore(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
clts(); /* Allow maths ops (or we recurse) */
|
clts(); /* Allow maths ops (or we recurse) */
|
||||||
/*
|
|
||||||
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
|
|
||||||
*/
|
|
||||||
if (unlikely(restore_fpu_checking(tsk))) {
|
|
||||||
stts();
|
|
||||||
force_sig(SIGSEGV, tsk);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
|
__math_state_restore();
|
||||||
tsk->fpu_counter++;
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(math_state_restore);
|
EXPORT_SYMBOL_GPL(math_state_restore);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user