Merge branch 'kvm-updates/2.6.33' of git://git.kernel.org/pub/scm/virt/kvm/kvm

* 'kvm-updates/2.6.33' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (84 commits)
  KVM: VMX: Fix comparison of guest efer with stale host value
  KVM: s390: Fix prefix register checking in arch/s390/kvm/sigp.c
  KVM: Drop user return notifier when disabling virtualization on a cpu
  KVM: VMX: Disable unrestricted guest when EPT disabled
  KVM: x86 emulator: limit instructions to 15 bytes
  KVM: s390: Make psw available on all exits, not just a subset
  KVM: x86: Add KVM_GET/SET_VCPU_EVENTS
  KVM: VMX: Report unexpected simultaneous exceptions as internal errors
  KVM: Allow internal errors reported to userspace to carry extra data
  KVM: Reorder IOCTLs in main kvm.h
  KVM: x86: Polish exception injection via KVM_SET_GUEST_DEBUG
  KVM: only clear irq_source_id if irqchip is present
  KVM: x86: disallow KVM_{SET,GET}_LAPIC without allocated in-kernel lapic
  KVM: x86: disallow multiple KVM_CREATE_IRQCHIP
  KVM: VMX: Remove vmx->msr_offset_efer
  KVM: MMU: update invlpg handler comment
  KVM: VMX: move CR3/PDPTR update to vmx_set_cr3
  KVM: remove duplicated task_switch check
  KVM: powerpc: Fix BUILD_BUG_ON condition
  KVM: VMX: Use shared msr infrastructure
  ...

Trivial conflicts due to new Kconfig options in arch/Kconfig and kernel/Makefile
This commit is contained in:
Linus Torvalds
2009-12-08 08:02:38 -08:00
45 changed files with 2936 additions and 1579 deletions

View File

@ -37,6 +37,7 @@
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
#include <linux/cpufreq.h>
#include <linux/user-return-notifier.h>
#include <trace/events/kvm.h>
#undef TRACE_INCLUDE_FILE
#define CREATE_TRACE_POINTS
@ -88,6 +89,25 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
int ignore_msrs = 0;
module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
#define KVM_NR_SHARED_MSRS 16
struct kvm_shared_msrs_global {
int nr;
struct kvm_shared_msr {
u32 msr;
u64 value;
} msrs[KVM_NR_SHARED_MSRS];
};
struct kvm_shared_msrs {
struct user_return_notifier urn;
bool registered;
u64 current_value[KVM_NR_SHARED_MSRS];
};
static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs);
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "pf_fixed", VCPU_STAT(pf_fixed) },
{ "pf_guest", VCPU_STAT(pf_guest) },
@ -124,6 +144,72 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
static void kvm_on_user_return(struct user_return_notifier *urn)
{
unsigned slot;
struct kvm_shared_msr *global;
struct kvm_shared_msrs *locals
= container_of(urn, struct kvm_shared_msrs, urn);
for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
global = &shared_msrs_global.msrs[slot];
if (global->value != locals->current_value[slot]) {
wrmsrl(global->msr, global->value);
locals->current_value[slot] = global->value;
}
}
locals->registered = false;
user_return_notifier_unregister(urn);
}
void kvm_define_shared_msr(unsigned slot, u32 msr)
{
int cpu;
u64 value;
if (slot >= shared_msrs_global.nr)
shared_msrs_global.nr = slot + 1;
shared_msrs_global.msrs[slot].msr = msr;
rdmsrl_safe(msr, &value);
shared_msrs_global.msrs[slot].value = value;
for_each_online_cpu(cpu)
per_cpu(shared_msrs, cpu).current_value[slot] = value;
}
EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
static void kvm_shared_msr_cpu_online(void)
{
unsigned i;
struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs);
for (i = 0; i < shared_msrs_global.nr; ++i)
locals->current_value[i] = shared_msrs_global.msrs[i].value;
}
void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
{
struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
if (((value ^ smsr->current_value[slot]) & mask) == 0)
return;
smsr->current_value[slot] = value;
wrmsrl(shared_msrs_global.msrs[slot].msr, value);
if (!smsr->registered) {
smsr->urn.on_user_return = kvm_on_user_return;
user_return_notifier_register(&smsr->urn);
smsr->registered = true;
}
}
EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
static void drop_user_return_notifiers(void *ignore)
{
struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
if (smsr->registered)
kvm_on_user_return(&smsr->urn);
}
unsigned long segment_base(u16 selector)
{
struct descriptor_table gdt;
@ -485,16 +571,19 @@ static inline u32 bit(int bitno)
* and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
*
* This list is modified at module load time to reflect the
* capabilities of the host cpu.
* capabilities of the host cpu. This capabilities test skips MSRs that are
* kvm-specific. Those are put in the beginning of the list.
*/
#define KVM_SAVE_MSRS_BEGIN 2
static u32 msrs_to_save[] = {
MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
MSR_K6_STAR,
#ifdef CONFIG_X86_64
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
};
static unsigned num_msrs_to_save;
@ -678,7 +767,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
/* With all the info we got, fill in the values */
vcpu->hv_clock.system_time = ts.tv_nsec +
(NSEC_PER_SEC * (u64)ts.tv_sec);
(NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset;
/*
* The interface expects us to write an even number signaling that the
* update is finished. Since the guest won't see the intermediate
@ -836,6 +926,38 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
return 0;
}
static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
{
struct kvm *kvm = vcpu->kvm;
int lm = is_long_mode(vcpu);
u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
: (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
: kvm->arch.xen_hvm_config.blob_size_32;
u32 page_num = data & ~PAGE_MASK;
u64 page_addr = data & PAGE_MASK;
u8 *page;
int r;
r = -E2BIG;
if (page_num >= blob_size)
goto out;
r = -ENOMEM;
page = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!page)
goto out;
r = -EFAULT;
if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE))
goto out_free;
if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
goto out_free;
r = 0;
out_free:
kfree(page);
out:
return r;
}
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
switch (msr) {
@ -951,6 +1073,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
"0x%x data 0x%llx\n", msr, data);
break;
default:
if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
return xen_hvm_config(vcpu, data);
if (!ignore_msrs) {
pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
msr, data);
@ -1225,6 +1349,9 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PIT2:
case KVM_CAP_PIT_STATE2:
case KVM_CAP_SET_IDENTITY_MAP_ADDR:
case KVM_CAP_XEN_HVM:
case KVM_CAP_ADJUST_CLOCK:
case KVM_CAP_VCPU_EVENTS:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@ -1239,8 +1366,8 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_NR_MEMSLOTS:
r = KVM_MEMORY_SLOTS;
break;
case KVM_CAP_PV_MMU:
r = !tdp_enabled;
case KVM_CAP_PV_MMU: /* obsolete */
r = 0;
break;
case KVM_CAP_IOMMU:
r = iommu_found();
@ -1327,6 +1454,12 @@ out:
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
kvm_x86_ops->vcpu_load(vcpu, cpu);
if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
unsigned long khz = cpufreq_quick_get(cpu);
if (!khz)
khz = tsc_khz;
per_cpu(cpu_tsc_khz, cpu) = khz;
}
kvm_request_guest_time_update(vcpu);
}
@ -1760,6 +1893,61 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
return 0;
}
static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
vcpu_load(vcpu);
events->exception.injected = vcpu->arch.exception.pending;
events->exception.nr = vcpu->arch.exception.nr;
events->exception.has_error_code = vcpu->arch.exception.has_error_code;
events->exception.error_code = vcpu->arch.exception.error_code;
events->interrupt.injected = vcpu->arch.interrupt.pending;
events->interrupt.nr = vcpu->arch.interrupt.nr;
events->interrupt.soft = vcpu->arch.interrupt.soft;
events->nmi.injected = vcpu->arch.nmi_injected;
events->nmi.pending = vcpu->arch.nmi_pending;
events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
events->sipi_vector = vcpu->arch.sipi_vector;
events->flags = 0;
vcpu_put(vcpu);
}
static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
if (events->flags)
return -EINVAL;
vcpu_load(vcpu);
vcpu->arch.exception.pending = events->exception.injected;
vcpu->arch.exception.nr = events->exception.nr;
vcpu->arch.exception.has_error_code = events->exception.has_error_code;
vcpu->arch.exception.error_code = events->exception.error_code;
vcpu->arch.interrupt.pending = events->interrupt.injected;
vcpu->arch.interrupt.nr = events->interrupt.nr;
vcpu->arch.interrupt.soft = events->interrupt.soft;
if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
kvm_pic_clear_isr_ack(vcpu->kvm);
vcpu->arch.nmi_injected = events->nmi.injected;
vcpu->arch.nmi_pending = events->nmi.pending;
kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
vcpu->arch.sipi_vector = events->sipi_vector;
vcpu_put(vcpu);
return 0;
}
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@ -1770,6 +1958,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
switch (ioctl) {
case KVM_GET_LAPIC: {
r = -EINVAL;
if (!vcpu->arch.apic)
goto out;
lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
r = -ENOMEM;
@ -1785,6 +1976,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_SET_LAPIC: {
r = -EINVAL;
if (!vcpu->arch.apic)
goto out;
lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
r = -ENOMEM;
if (!lapic)
@ -1911,6 +2105,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
break;
}
case KVM_GET_VCPU_EVENTS: {
struct kvm_vcpu_events events;
kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
r = -EFAULT;
if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
break;
r = 0;
break;
}
case KVM_SET_VCPU_EVENTS: {
struct kvm_vcpu_events events;
r = -EFAULT;
if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
break;
r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
break;
}
default:
r = -EINVAL;
}
@ -2039,9 +2254,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
sizeof(struct kvm_pic_state));
break;
case KVM_IRQCHIP_IOAPIC:
memcpy(&chip->chip.ioapic,
ioapic_irqchip(kvm),
sizeof(struct kvm_ioapic_state));
r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
break;
default:
r = -EINVAL;
@ -2071,11 +2284,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
spin_unlock(&pic_irqchip(kvm)->lock);
break;
case KVM_IRQCHIP_IOAPIC:
mutex_lock(&kvm->irq_lock);
memcpy(ioapic_irqchip(kvm),
&chip->chip.ioapic,
sizeof(struct kvm_ioapic_state));
mutex_unlock(&kvm->irq_lock);
r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
break;
default:
r = -EINVAL;
@ -2183,7 +2392,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
{
struct kvm *kvm = filp->private_data;
void __user *argp = (void __user *)arg;
int r = -EINVAL;
int r = -ENOTTY;
/*
* This union makes it completely explicit to gcc-3.x
* that these two variables' stack usage should be
@ -2245,25 +2454,39 @@ long kvm_arch_vm_ioctl(struct file *filp,
if (r)
goto out;
break;
case KVM_CREATE_IRQCHIP:
case KVM_CREATE_IRQCHIP: {
struct kvm_pic *vpic;
mutex_lock(&kvm->lock);
r = -EEXIST;
if (kvm->arch.vpic)
goto create_irqchip_unlock;
r = -ENOMEM;
kvm->arch.vpic = kvm_create_pic(kvm);
if (kvm->arch.vpic) {
vpic = kvm_create_pic(kvm);
if (vpic) {
r = kvm_ioapic_init(kvm);
if (r) {
kfree(kvm->arch.vpic);
kvm->arch.vpic = NULL;
goto out;
kfree(vpic);
goto create_irqchip_unlock;
}
} else
goto out;
goto create_irqchip_unlock;
smp_wmb();
kvm->arch.vpic = vpic;
smp_wmb();
r = kvm_setup_default_irq_routing(kvm);
if (r) {
mutex_lock(&kvm->irq_lock);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
goto out;
kvm->arch.vpic = NULL;
kvm->arch.vioapic = NULL;
mutex_unlock(&kvm->irq_lock);
}
create_irqchip_unlock:
mutex_unlock(&kvm->lock);
break;
}
case KVM_CREATE_PIT:
u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
goto create_pit;
@ -2293,10 +2516,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
if (irqchip_in_kernel(kvm)) {
__s32 status;
mutex_lock(&kvm->irq_lock);
status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
irq_event.irq, irq_event.level);
mutex_unlock(&kvm->irq_lock);
if (ioctl == KVM_IRQ_LINE_STATUS) {
irq_event.status = status;
if (copy_to_user(argp, &irq_event,
@ -2422,6 +2643,55 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
case KVM_XEN_HVM_CONFIG: {
r = -EFAULT;
if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
sizeof(struct kvm_xen_hvm_config)))
goto out;
r = -EINVAL;
if (kvm->arch.xen_hvm_config.flags)
goto out;
r = 0;
break;
}
case KVM_SET_CLOCK: {
struct timespec now;
struct kvm_clock_data user_ns;
u64 now_ns;
s64 delta;
r = -EFAULT;
if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
goto out;
r = -EINVAL;
if (user_ns.flags)
goto out;
r = 0;
ktime_get_ts(&now);
now_ns = timespec_to_ns(&now);
delta = user_ns.clock - now_ns;
kvm->arch.kvmclock_offset = delta;
break;
}
case KVM_GET_CLOCK: {
struct timespec now;
struct kvm_clock_data user_ns;
u64 now_ns;
ktime_get_ts(&now);
now_ns = timespec_to_ns(&now);
user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
user_ns.flags = 0;
r = -EFAULT;
if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
goto out;
r = 0;
break;
}
default:
;
}
@ -2434,7 +2704,8 @@ static void kvm_init_msr_list(void)
u32 dummy[2];
unsigned i, j;
for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
/* skip the first msrs in the list. KVM-specific */
for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
continue;
if (j < i)
@ -2758,13 +3029,13 @@ static void cache_all_regs(struct kvm_vcpu *vcpu)
}
int emulate_instruction(struct kvm_vcpu *vcpu,
struct kvm_run *run,
unsigned long cr2,
u16 error_code,
int emulation_type)
{
int r, shadow_mask;
struct decode_cache *c;
struct kvm_run *run = vcpu->run;
kvm_clear_exception_queue(vcpu);
vcpu->arch.mmio_fault_cr2 = cr2;
@ -2784,7 +3055,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
vcpu->arch.emulate_ctxt.vcpu = vcpu;
vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
vcpu->arch.emulate_ctxt.mode =
(vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
? X86EMUL_MODE_REAL : cs_l
@ -2862,7 +3133,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
return EMULATE_DO_MMIO;
}
kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
if (vcpu->mmio_is_write) {
vcpu->mmio_needed = 0;
@ -2970,8 +3241,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu)
return r;
}
int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned port)
int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
{
unsigned long val;
@ -3000,7 +3270,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
}
EXPORT_SYMBOL_GPL(kvm_emulate_pio);
int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
int size, unsigned long count, int down,
gva_t address, int rep, unsigned port)
{
@ -3073,9 +3343,6 @@ static void bounce_off(void *info)
/* nothing */
}
static unsigned int ref_freq;
static unsigned long tsc_khz_ref;
static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
@ -3084,14 +3351,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
struct kvm_vcpu *vcpu;
int i, send_ipi = 0;
if (!ref_freq)
ref_freq = freq->old;
if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
return 0;
if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
return 0;
per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
@ -3128,9 +3392,28 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = {
.notifier_call = kvmclock_cpufreq_notifier
};
static void kvm_timer_init(void)
{
int cpu;
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
for_each_online_cpu(cpu) {
unsigned long khz = cpufreq_get(cpu);
if (!khz)
khz = tsc_khz;
per_cpu(cpu_tsc_khz, cpu) = khz;
}
} else {
for_each_possible_cpu(cpu)
per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
}
}
int kvm_arch_init(void *opaque)
{
int r, cpu;
int r;
struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
if (kvm_x86_ops) {
@ -3162,13 +3445,7 @@ int kvm_arch_init(void *opaque)
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0);
for_each_possible_cpu(cpu)
per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
tsc_khz_ref = tsc_khz;
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
}
kvm_timer_init();
return 0;
@ -3296,7 +3573,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
unsigned long *rflags)
{
kvm_lmsw(vcpu, msw);
*rflags = kvm_x86_ops->get_rflags(vcpu);
*rflags = kvm_get_rflags(vcpu);
}
unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
@ -3334,7 +3611,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
switch (cr) {
case 0:
kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
*rflags = kvm_x86_ops->get_rflags(vcpu);
*rflags = kvm_get_rflags(vcpu);
break;
case 2:
vcpu->arch.cr2 = val;
@ -3454,18 +3731,18 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
*
* No need to exit to userspace if we already have an interrupt queued.
*/
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
{
return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
kvm_run->request_interrupt_window &&
vcpu->run->request_interrupt_window &&
kvm_arch_interrupt_allowed(vcpu));
}
static void post_kvm_run_save(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
static void post_kvm_run_save(struct kvm_vcpu *vcpu)
{
kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
struct kvm_run *kvm_run = vcpu->run;
kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
if (irqchip_in_kernel(vcpu->kvm))
@ -3526,7 +3803,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
}
static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static void inject_pending_event(struct kvm_vcpu *vcpu)
{
/* try to reinject previous events if any */
if (vcpu->arch.exception.pending) {
@ -3562,11 +3839,11 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
}
static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
{
int r;
bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
kvm_run->request_interrupt_window;
vcpu->run->request_interrupt_window;
if (vcpu->requests)
if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
@ -3587,12 +3864,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_x86_ops->tlb_flush(vcpu);
if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
&vcpu->requests)) {
kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
r = 0;
goto out;
}
if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
r = 0;
goto out;
}
@ -3616,7 +3893,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
goto out;
}
inject_pending_event(vcpu, kvm_run);
inject_pending_event(vcpu);
/* enable NMI/IRQ window open exits if needed */
if (vcpu->arch.nmi_pending)
@ -3642,7 +3919,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
trace_kvm_entry(vcpu->vcpu_id);
kvm_x86_ops->run(vcpu, kvm_run);
kvm_x86_ops->run(vcpu);
/*
* If the guest has used debug registers, at least dr7
@ -3684,13 +3961,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_lapic_sync_from_vapic(vcpu);
r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
r = kvm_x86_ops->handle_exit(vcpu);
out:
return r;
}
static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static int __vcpu_run(struct kvm_vcpu *vcpu)
{
int r;
@ -3710,7 +3987,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
r = 1;
while (r > 0) {
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
r = vcpu_enter_guest(vcpu, kvm_run);
r = vcpu_enter_guest(vcpu);
else {
up_read(&vcpu->kvm->slots_lock);
kvm_vcpu_block(vcpu);
@ -3738,14 +4015,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (kvm_cpu_has_pending_timer(vcpu))
kvm_inject_pending_timer_irqs(vcpu);
if (dm_request_for_irq_injection(vcpu, kvm_run)) {
if (dm_request_for_irq_injection(vcpu)) {
r = -EINTR;
kvm_run->exit_reason = KVM_EXIT_INTR;
vcpu->run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.request_irq_exits;
}
if (signal_pending(current)) {
r = -EINTR;
kvm_run->exit_reason = KVM_EXIT_INTR;
vcpu->run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.signal_exits;
}
if (need_resched()) {
@ -3756,7 +4033,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
up_read(&vcpu->kvm->slots_lock);
post_kvm_run_save(vcpu, kvm_run);
post_kvm_run_save(vcpu);
vapic_exit(vcpu);
@ -3789,15 +4066,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (r)
goto out;
}
#if CONFIG_HAS_IOMEM
if (vcpu->mmio_needed) {
memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
vcpu->mmio_read_completed = 1;
vcpu->mmio_needed = 0;
down_read(&vcpu->kvm->slots_lock);
r = emulate_instruction(vcpu, kvm_run,
vcpu->arch.mmio_fault_cr2, 0,
r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0,
EMULTYPE_NO_DECODE);
up_read(&vcpu->kvm->slots_lock);
if (r == EMULATE_DO_MMIO) {
@ -3808,12 +4083,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
goto out;
}
}
#endif
if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
kvm_register_write(vcpu, VCPU_REGS_RAX,
kvm_run->hypercall.ret);
r = __vcpu_run(vcpu, kvm_run);
r = __vcpu_run(vcpu);
out:
if (vcpu->sigset_active)
@ -3847,13 +4121,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
#endif
regs->rip = kvm_rip_read(vcpu);
regs->rflags = kvm_x86_ops->get_rflags(vcpu);
/*
* Don't leak debug flags in case they were set for guest debugging
*/
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
regs->rflags = kvm_get_rflags(vcpu);
vcpu_put(vcpu);
@ -3881,12 +4149,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
#endif
kvm_rip_write(vcpu, regs->rip);
kvm_x86_ops->set_rflags(vcpu, regs->rflags);
kvm_set_rflags(vcpu, regs->rflags);
vcpu->arch.exception.pending = false;
@ -4105,7 +4371,7 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
{
return (seg != VCPU_SREG_LDTR) &&
(seg != VCPU_SREG_TR) &&
(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_VM);
(kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
}
int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
@ -4133,7 +4399,7 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
{
tss->cr3 = vcpu->arch.cr3;
tss->eip = kvm_rip_read(vcpu);
tss->eflags = kvm_x86_ops->get_rflags(vcpu);
tss->eflags = kvm_get_rflags(vcpu);
tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
@ -4157,7 +4423,7 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
kvm_set_cr3(vcpu, tss->cr3);
kvm_rip_write(vcpu, tss->eip);
kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
kvm_set_rflags(vcpu, tss->eflags | 2);
kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
@ -4195,7 +4461,7 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
struct tss_segment_16 *tss)
{
tss->ip = kvm_rip_read(vcpu);
tss->flag = kvm_x86_ops->get_rflags(vcpu);
tss->flag = kvm_get_rflags(vcpu);
tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
@ -4210,14 +4476,13 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
}
static int load_state_from_tss16(struct kvm_vcpu *vcpu,
struct tss_segment_16 *tss)
{
kvm_rip_write(vcpu, tss->ip);
kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
kvm_set_rflags(vcpu, tss->flag | 2);
kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
@ -4363,15 +4628,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
}
if (reason == TASK_SWITCH_IRET) {
u32 eflags = kvm_x86_ops->get_rflags(vcpu);
kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
u32 eflags = kvm_get_rflags(vcpu);
kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
}
/* set back link to prev task only if NT bit is set in eflags
note that old_tss_sel is not used afetr this point */
if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
old_tss_sel = 0xffff;
/* set back link to prev task only if NT bit is set in eflags
note that old_tss_sel is not used afetr this point */
if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
@ -4385,8 +4645,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
old_tss_base, &nseg_desc);
if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
u32 eflags = kvm_x86_ops->get_rflags(vcpu);
kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT);
u32 eflags = kvm_get_rflags(vcpu);
kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT);
}
if (reason != TASK_SWITCH_IRET) {
@ -4438,8 +4698,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
if (!is_long_mode(vcpu) && is_pae(vcpu))
if (!is_long_mode(vcpu) && is_pae(vcpu)) {
load_pdptrs(vcpu, vcpu->arch.cr3);
mmu_reset_needed = 1;
}
if (mmu_reset_needed)
kvm_mmu_reset_context(vcpu);
@ -4480,12 +4742,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
unsigned long rflags;
int i, r;
vcpu_load(vcpu);
if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
r = -EBUSY;
if (vcpu->arch.exception.pending)
goto unlock_out;
if (dbg->control & KVM_GUESTDBG_INJECT_DB)
kvm_queue_exception(vcpu, DB_VECTOR);
else
kvm_queue_exception(vcpu, BP_VECTOR);
}
/*
* Read rflags as long as potentially injected trace flags are still
* filtered out.
*/
rflags = kvm_get_rflags(vcpu);
vcpu->guest_debug = dbg->control;
if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
vcpu->guest_debug = 0;
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
for (i = 0; i < KVM_NR_DB_REGS; ++i)
vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
vcpu->arch.switch_db_regs =
@ -4496,13 +4778,23 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
}
r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
vcpu->arch.singlestep_cs =
get_segment_selector(vcpu, VCPU_SREG_CS);
vcpu->arch.singlestep_rip = kvm_rip_read(vcpu);
}
if (dbg->control & KVM_GUESTDBG_INJECT_DB)
kvm_queue_exception(vcpu, DB_VECTOR);
else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
kvm_queue_exception(vcpu, BP_VECTOR);
/*
* Trigger an rflags update that will inject or remove the trace
* flags.
*/
kvm_set_rflags(vcpu, rflags);
kvm_x86_ops->set_guest_debug(vcpu, dbg);
r = 0;
unlock_out:
vcpu_put(vcpu);
return r;
@ -4703,14 +4995,26 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
return kvm_x86_ops->vcpu_reset(vcpu);
}
void kvm_arch_hardware_enable(void *garbage)
int kvm_arch_hardware_enable(void *garbage)
{
kvm_x86_ops->hardware_enable(garbage);
/*
* Since this may be called from a hotplug notifcation,
* we can't get the CPU frequency directly.
*/
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
int cpu = raw_smp_processor_id();
per_cpu(cpu_tsc_khz, cpu) = 0;
}
kvm_shared_msr_cpu_online();
return kvm_x86_ops->hardware_enable(garbage);
}
void kvm_arch_hardware_disable(void *garbage)
{
kvm_x86_ops->hardware_disable(garbage);
drop_user_return_notifiers(garbage);
}
int kvm_arch_hardware_setup(void)
@ -4948,8 +5252,36 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
return kvm_x86_ops->interrupt_allowed(vcpu);
}
unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
{
unsigned long rflags;
rflags = kvm_x86_ops->get_rflags(vcpu);
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF);
return rflags;
}
EXPORT_SYMBOL_GPL(kvm_get_rflags);
void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
vcpu->arch.singlestep_cs ==
get_segment_selector(vcpu, VCPU_SREG_CS) &&
vcpu->arch.singlestep_rip == kvm_rip_read(vcpu))
rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
kvm_x86_ops->set_rflags(vcpu, rflags);
}
EXPORT_SYMBOL_GPL(kvm_set_rflags);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);