Merge branch 'xen-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen
* 'xen-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen: xfs: eagerly remove vmap mappings to avoid upsetting Xen xen: add some debug output for failed multicalls xen: fix incorrect vcpu_register_vcpu_info hypercall argument xen: ask the hypervisor how much space it needs reserved xen: lock pte pages while pinning/unpinning xen: deal with stale cr3 values when unpinning pagetables xen: add batch completion callbacks xen: yield to IPI target if necessary Clean up duplicate includes in arch/i386/xen/ remove dead code in pgtable_cache_init paravirt: clean up lazy mode handling paravirt: refactor struct paravirt_ops into smaller pv_*_ops
This commit is contained in:
@@ -25,7 +25,6 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/page-flags.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
#include <xen/interface/physdev.h>
|
||||
@@ -52,11 +51,25 @@
|
||||
|
||||
EXPORT_SYMBOL_GPL(hypercall_page);
|
||||
|
||||
DEFINE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode);
|
||||
|
||||
DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
|
||||
DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
|
||||
DEFINE_PER_CPU(unsigned long, xen_cr3);
|
||||
|
||||
/*
|
||||
* Note about cr3 (pagetable base) values:
|
||||
*
|
||||
* xen_cr3 contains the current logical cr3 value; it contains the
|
||||
* last set cr3. This may not be the current effective cr3, because
|
||||
* its update may be being lazily deferred. However, a vcpu looking
|
||||
* at its own cr3 can use this value knowing that it everything will
|
||||
* be self-consistent.
|
||||
*
|
||||
* xen_current_cr3 contains the actual vcpu cr3; it is set once the
|
||||
* hypercall to set the vcpu cr3 is complete (so it may be a little
|
||||
* out of date, but it will never be set early). If one vcpu is
|
||||
* looking at another vcpu's cr3 value, it should use this variable.
|
||||
*/
|
||||
DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */
|
||||
DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
|
||||
|
||||
struct start_info *xen_start_info;
|
||||
EXPORT_SYMBOL_GPL(xen_start_info);
|
||||
@@ -100,7 +113,7 @@ static void __init xen_vcpu_setup(int cpu)
|
||||
info.mfn = virt_to_mfn(vcpup);
|
||||
info.offset = offset_in_page(vcpup);
|
||||
|
||||
printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %x, offset %d\n",
|
||||
printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n",
|
||||
cpu, vcpup, info.mfn, info.offset);
|
||||
|
||||
/* Check to see if the hypervisor will put the vcpu_info
|
||||
@@ -124,7 +137,7 @@ static void __init xen_vcpu_setup(int cpu)
|
||||
static void __init xen_banner(void)
|
||||
{
|
||||
printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
|
||||
paravirt_ops.name);
|
||||
pv_info.name);
|
||||
printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
|
||||
}
|
||||
|
||||
@@ -249,29 +262,10 @@ static void xen_halt(void)
|
||||
xen_safe_halt();
|
||||
}
|
||||
|
||||
static void xen_set_lazy_mode(enum paravirt_lazy_mode mode)
|
||||
static void xen_leave_lazy(void)
|
||||
{
|
||||
BUG_ON(preemptible());
|
||||
|
||||
switch (mode) {
|
||||
case PARAVIRT_LAZY_NONE:
|
||||
BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE);
|
||||
break;
|
||||
|
||||
case PARAVIRT_LAZY_MMU:
|
||||
case PARAVIRT_LAZY_CPU:
|
||||
BUG_ON(x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE);
|
||||
break;
|
||||
|
||||
case PARAVIRT_LAZY_FLUSH:
|
||||
/* flush if necessary, but don't change state */
|
||||
if (x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE)
|
||||
xen_mc_flush();
|
||||
return;
|
||||
}
|
||||
|
||||
paravirt_leave_lazy(paravirt_get_lazy_mode());
|
||||
xen_mc_flush();
|
||||
x86_write_percpu(xen_lazy_mode, mode);
|
||||
}
|
||||
|
||||
static unsigned long xen_store_tr(void)
|
||||
@@ -358,7 +352,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
|
||||
* loaded properly. This will go away as soon as Xen has been
|
||||
* modified to not save/restore %gs for normal hypercalls.
|
||||
*/
|
||||
if (xen_get_lazy_mode() == PARAVIRT_LAZY_CPU)
|
||||
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
|
||||
loadsegment(gs, 0);
|
||||
}
|
||||
|
||||
@@ -632,32 +626,36 @@ static unsigned long xen_read_cr3(void)
|
||||
return x86_read_percpu(xen_cr3);
|
||||
}
|
||||
|
||||
static void set_current_cr3(void *v)
|
||||
{
|
||||
x86_write_percpu(xen_current_cr3, (unsigned long)v);
|
||||
}
|
||||
|
||||
static void xen_write_cr3(unsigned long cr3)
|
||||
{
|
||||
struct mmuext_op *op;
|
||||
struct multicall_space mcs;
|
||||
unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
|
||||
|
||||
BUG_ON(preemptible());
|
||||
|
||||
if (cr3 == x86_read_percpu(xen_cr3)) {
|
||||
/* just a simple tlb flush */
|
||||
xen_flush_tlb();
|
||||
return;
|
||||
}
|
||||
mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */
|
||||
|
||||
/* Update while interrupts are disabled, so its atomic with
|
||||
respect to ipis */
|
||||
x86_write_percpu(xen_cr3, cr3);
|
||||
|
||||
op = mcs.args;
|
||||
op->cmd = MMUEXT_NEW_BASEPTR;
|
||||
op->arg1.mfn = mfn;
|
||||
|
||||
{
|
||||
struct mmuext_op *op;
|
||||
struct multicall_space mcs = xen_mc_entry(sizeof(*op));
|
||||
unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
|
||||
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
|
||||
|
||||
op = mcs.args;
|
||||
op->cmd = MMUEXT_NEW_BASEPTR;
|
||||
op->arg1.mfn = mfn;
|
||||
/* Update xen_update_cr3 once the batch has actually
|
||||
been submitted. */
|
||||
xen_mc_callback(set_current_cr3, (void *)cr3);
|
||||
|
||||
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
|
||||
|
||||
xen_mc_issue(PARAVIRT_LAZY_CPU);
|
||||
}
|
||||
xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
|
||||
}
|
||||
|
||||
/* Early in boot, while setting up the initial pagetable, assume
|
||||
@@ -668,6 +666,15 @@ static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn)
|
||||
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
|
||||
}
|
||||
|
||||
static void pin_pagetable_pfn(unsigned level, unsigned long pfn)
|
||||
{
|
||||
struct mmuext_op op;
|
||||
op.cmd = level;
|
||||
op.arg1.mfn = pfn_to_mfn(pfn);
|
||||
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* This needs to make sure the new pte page is pinned iff its being
|
||||
attached to a pinned pagetable. */
|
||||
static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
|
||||
@@ -677,9 +684,10 @@ static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
|
||||
if (PagePinned(virt_to_page(mm->pgd))) {
|
||||
SetPagePinned(page);
|
||||
|
||||
if (!PageHighMem(page))
|
||||
if (!PageHighMem(page)) {
|
||||
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
|
||||
else
|
||||
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
|
||||
} else
|
||||
/* make sure there are no stray mappings of
|
||||
this page */
|
||||
kmap_flush_unused();
|
||||
@@ -692,8 +700,10 @@ static void xen_release_pt(u32 pfn)
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
if (PagePinned(page)) {
|
||||
if (!PageHighMem(page))
|
||||
if (!PageHighMem(page)) {
|
||||
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
|
||||
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -738,7 +748,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
|
||||
pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
|
||||
|
||||
/* special set_pte for pagetable initialization */
|
||||
paravirt_ops.set_pte = xen_set_pte_init;
|
||||
pv_mmu_ops.set_pte = xen_set_pte_init;
|
||||
|
||||
init_mm.pgd = base;
|
||||
/*
|
||||
@@ -785,8 +795,8 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
|
||||
{
|
||||
/* This will work as long as patching hasn't happened yet
|
||||
(which it hasn't) */
|
||||
paravirt_ops.alloc_pt = xen_alloc_pt;
|
||||
paravirt_ops.set_pte = xen_set_pte;
|
||||
pv_mmu_ops.alloc_pt = xen_alloc_pt;
|
||||
pv_mmu_ops.set_pte = xen_set_pte;
|
||||
|
||||
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
||||
/*
|
||||
@@ -808,15 +818,15 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
|
||||
/* Actually pin the pagetable down, but we can't set PG_pinned
|
||||
yet because the page structures don't exist yet. */
|
||||
{
|
||||
struct mmuext_op op;
|
||||
unsigned level;
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
op.cmd = MMUEXT_PIN_L3_TABLE;
|
||||
level = MMUEXT_PIN_L3_TABLE;
|
||||
#else
|
||||
op.cmd = MMUEXT_PIN_L3_TABLE;
|
||||
level = MMUEXT_PIN_L2_TABLE;
|
||||
#endif
|
||||
op.arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(base)));
|
||||
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
|
||||
BUG();
|
||||
|
||||
pin_pagetable_pfn(level, PFN_DOWN(__pa(base)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -833,12 +843,12 @@ void __init xen_setup_vcpu_info_placement(void)
|
||||
if (have_vcpu_info_placement) {
|
||||
printk(KERN_INFO "Xen: using vcpu_info placement\n");
|
||||
|
||||
paravirt_ops.save_fl = xen_save_fl_direct;
|
||||
paravirt_ops.restore_fl = xen_restore_fl_direct;
|
||||
paravirt_ops.irq_disable = xen_irq_disable_direct;
|
||||
paravirt_ops.irq_enable = xen_irq_enable_direct;
|
||||
paravirt_ops.read_cr2 = xen_read_cr2_direct;
|
||||
paravirt_ops.iret = xen_iret_direct;
|
||||
pv_irq_ops.save_fl = xen_save_fl_direct;
|
||||
pv_irq_ops.restore_fl = xen_restore_fl_direct;
|
||||
pv_irq_ops.irq_disable = xen_irq_disable_direct;
|
||||
pv_irq_ops.irq_enable = xen_irq_enable_direct;
|
||||
pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
|
||||
pv_cpu_ops.iret = xen_iret_direct;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -850,8 +860,8 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
|
||||
|
||||
start = end = reloc = NULL;
|
||||
|
||||
#define SITE(x) \
|
||||
case PARAVIRT_PATCH(x): \
|
||||
#define SITE(op, x) \
|
||||
case PARAVIRT_PATCH(op.x): \
|
||||
if (have_vcpu_info_placement) { \
|
||||
start = (char *)xen_##x##_direct; \
|
||||
end = xen_##x##_direct_end; \
|
||||
@@ -860,10 +870,10 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
|
||||
goto patch_site
|
||||
|
||||
switch (type) {
|
||||
SITE(irq_enable);
|
||||
SITE(irq_disable);
|
||||
SITE(save_fl);
|
||||
SITE(restore_fl);
|
||||
SITE(pv_irq_ops, irq_enable);
|
||||
SITE(pv_irq_ops, irq_disable);
|
||||
SITE(pv_irq_ops, save_fl);
|
||||
SITE(pv_irq_ops, restore_fl);
|
||||
#undef SITE
|
||||
|
||||
patch_site:
|
||||
@@ -895,26 +905,32 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct paravirt_ops xen_paravirt_ops __initdata = {
|
||||
static const struct pv_info xen_info __initdata = {
|
||||
.paravirt_enabled = 1,
|
||||
.shared_kernel_pmd = 0,
|
||||
|
||||
.name = "Xen",
|
||||
.banner = xen_banner,
|
||||
};
|
||||
|
||||
static const struct pv_init_ops xen_init_ops __initdata = {
|
||||
.patch = xen_patch,
|
||||
|
||||
.banner = xen_banner,
|
||||
.memory_setup = xen_memory_setup,
|
||||
.arch_setup = xen_arch_setup,
|
||||
.init_IRQ = xen_init_IRQ,
|
||||
.post_allocator_init = xen_mark_init_mm_pinned,
|
||||
};
|
||||
|
||||
static const struct pv_time_ops xen_time_ops __initdata = {
|
||||
.time_init = xen_time_init,
|
||||
|
||||
.set_wallclock = xen_set_wallclock,
|
||||
.get_wallclock = xen_get_wallclock,
|
||||
.get_cpu_khz = xen_cpu_khz,
|
||||
.sched_clock = xen_sched_clock,
|
||||
};
|
||||
|
||||
static const struct pv_cpu_ops xen_cpu_ops __initdata = {
|
||||
.cpuid = xen_cpuid,
|
||||
|
||||
.set_debugreg = xen_set_debugreg,
|
||||
@@ -925,22 +941,10 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
|
||||
.read_cr0 = native_read_cr0,
|
||||
.write_cr0 = native_write_cr0,
|
||||
|
||||
.read_cr2 = xen_read_cr2,
|
||||
.write_cr2 = xen_write_cr2,
|
||||
|
||||
.read_cr3 = xen_read_cr3,
|
||||
.write_cr3 = xen_write_cr3,
|
||||
|
||||
.read_cr4 = native_read_cr4,
|
||||
.read_cr4_safe = native_read_cr4_safe,
|
||||
.write_cr4 = xen_write_cr4,
|
||||
|
||||
.save_fl = xen_save_fl,
|
||||
.restore_fl = xen_restore_fl,
|
||||
.irq_disable = xen_irq_disable,
|
||||
.irq_enable = xen_irq_enable,
|
||||
.safe_halt = xen_safe_halt,
|
||||
.halt = xen_halt,
|
||||
.wbinvd = native_wbinvd,
|
||||
|
||||
.read_msr = native_read_msr_safe,
|
||||
@@ -969,6 +973,23 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
|
||||
.set_iopl_mask = xen_set_iopl_mask,
|
||||
.io_delay = xen_io_delay,
|
||||
|
||||
.lazy_mode = {
|
||||
.enter = paravirt_enter_lazy_cpu,
|
||||
.leave = xen_leave_lazy,
|
||||
},
|
||||
};
|
||||
|
||||
static const struct pv_irq_ops xen_irq_ops __initdata = {
|
||||
.init_IRQ = xen_init_IRQ,
|
||||
.save_fl = xen_save_fl,
|
||||
.restore_fl = xen_restore_fl,
|
||||
.irq_disable = xen_irq_disable,
|
||||
.irq_enable = xen_irq_enable,
|
||||
.safe_halt = xen_safe_halt,
|
||||
.halt = xen_halt,
|
||||
};
|
||||
|
||||
static const struct pv_apic_ops xen_apic_ops __initdata = {
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
.apic_write = xen_apic_write,
|
||||
.apic_write_atomic = xen_apic_write,
|
||||
@@ -977,6 +998,17 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
|
||||
.setup_secondary_clock = paravirt_nop,
|
||||
.startup_ipi_hook = paravirt_nop,
|
||||
#endif
|
||||
};
|
||||
|
||||
static const struct pv_mmu_ops xen_mmu_ops __initdata = {
|
||||
.pagetable_setup_start = xen_pagetable_setup_start,
|
||||
.pagetable_setup_done = xen_pagetable_setup_done,
|
||||
|
||||
.read_cr2 = xen_read_cr2,
|
||||
.write_cr2 = xen_write_cr2,
|
||||
|
||||
.read_cr3 = xen_read_cr3,
|
||||
.write_cr3 = xen_write_cr3,
|
||||
|
||||
.flush_tlb_user = xen_flush_tlb,
|
||||
.flush_tlb_kernel = xen_flush_tlb,
|
||||
@@ -986,9 +1018,6 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
|
||||
.pte_update = paravirt_nop,
|
||||
.pte_update_defer = paravirt_nop,
|
||||
|
||||
.pagetable_setup_start = xen_pagetable_setup_start,
|
||||
.pagetable_setup_done = xen_pagetable_setup_done,
|
||||
|
||||
.alloc_pt = xen_alloc_pt_init,
|
||||
.release_pt = xen_release_pt,
|
||||
.alloc_pd = paravirt_nop,
|
||||
@@ -1024,7 +1053,10 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
|
||||
.dup_mmap = xen_dup_mmap,
|
||||
.exit_mmap = xen_exit_mmap,
|
||||
|
||||
.set_lazy_mode = xen_set_lazy_mode,
|
||||
.lazy_mode = {
|
||||
.enter = paravirt_enter_lazy_mmu,
|
||||
.leave = xen_leave_lazy,
|
||||
},
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
@@ -1080,6 +1112,17 @@ static const struct machine_ops __initdata xen_machine_ops = {
|
||||
};
|
||||
|
||||
|
||||
static void __init xen_reserve_top(void)
|
||||
{
|
||||
unsigned long top = HYPERVISOR_VIRT_START;
|
||||
struct xen_platform_parameters pp;
|
||||
|
||||
if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
|
||||
top = pp.virt_start;
|
||||
|
||||
reserve_top_address(-top + 2 * PAGE_SIZE);
|
||||
}
|
||||
|
||||
/* First C function to be called on Xen boot */
|
||||
asmlinkage void __init xen_start_kernel(void)
|
||||
{
|
||||
@@ -1091,7 +1134,14 @@ asmlinkage void __init xen_start_kernel(void)
|
||||
BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0);
|
||||
|
||||
/* Install Xen paravirt ops */
|
||||
paravirt_ops = xen_paravirt_ops;
|
||||
pv_info = xen_info;
|
||||
pv_init_ops = xen_init_ops;
|
||||
pv_time_ops = xen_time_ops;
|
||||
pv_cpu_ops = xen_cpu_ops;
|
||||
pv_irq_ops = xen_irq_ops;
|
||||
pv_apic_ops = xen_apic_ops;
|
||||
pv_mmu_ops = xen_mmu_ops;
|
||||
|
||||
machine_ops = xen_machine_ops;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
@@ -1113,6 +1163,7 @@ asmlinkage void __init xen_start_kernel(void)
|
||||
/* keep using Xen gdt for now; no urgent need to change it */
|
||||
|
||||
x86_write_percpu(xen_cr3, __pa(pgd));
|
||||
x86_write_percpu(xen_current_cr3, __pa(pgd));
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* Don't do the full vcpu_info placement stuff until we have a
|
||||
@@ -1124,12 +1175,12 @@ asmlinkage void __init xen_start_kernel(void)
|
||||
xen_setup_vcpu_info_placement();
|
||||
#endif
|
||||
|
||||
paravirt_ops.kernel_rpl = 1;
|
||||
pv_info.kernel_rpl = 1;
|
||||
if (xen_feature(XENFEAT_supervisor_mode_kernel))
|
||||
paravirt_ops.kernel_rpl = 0;
|
||||
pv_info.kernel_rpl = 0;
|
||||
|
||||
/* set the limit of our address space */
|
||||
reserve_top_address(-HYPERVISOR_VIRT_START + 2 * PAGE_SIZE);
|
||||
xen_reserve_top();
|
||||
|
||||
/* set up basic CPUID stuff */
|
||||
cpu_detect(&new_cpu_data);
|
||||
|
Reference in New Issue
Block a user