diff --git a/cpus.c b/cpus.c index 29aced5..e079ee5 100644 --- a/cpus.c +++ b/cpus.c @@ -187,6 +187,15 @@ void cpu_disable_ticks(void) } } +void cpu_clean_all_dirty(void) +{ + CPUArchState *cpu; + + for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) { + cpu_clean_state(cpu); + } +} + /* Correlation between real and virtual time is always going to be fairly approximate, so ignore small variation. When the guest is idle real and virtual time will be aligned in diff --git a/cpus.h b/cpus.h index 3fc1a4a..1ff166b 100644 --- a/cpus.h +++ b/cpus.h @@ -12,6 +12,7 @@ void unplug_vcpu(void *p); void cpu_synchronize_all_states(void); void cpu_synchronize_all_post_reset(void); void cpu_synchronize_all_post_init(void); +void cpu_clean_all_dirty(void); void qtest_clock_warp(int64_t dest); diff --git a/hw/kvm/clock.c b/hw/kvm/clock.c index 824b978..b2bdda4 100644 --- a/hw/kvm/clock.c +++ b/hw/kvm/clock.c @@ -16,6 +16,8 @@ #include "qemu-common.h" #include "sysemu.h" #include "kvm.h" +#include "host-utils.h" +#include "cpus.h" #include "hw/sysbus.h" #include "hw/kvm/clock.h" @@ -28,6 +30,46 @@ typedef struct KVMClockState { bool clock_valid; } KVMClockState; +struct pvclock_vcpu_time_info { + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; + uint64_t system_time; + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + uint8_t flags; + uint8_t pad[2]; +} __attribute__((__packed__)); /* 32 bytes */ + +static uint64_t kvmclock_current_nsec(KVMClockState *s) +{ + CPUArchState *env = first_cpu; + uint64_t migration_tsc = env->tsc; + struct pvclock_vcpu_time_info time; + uint64_t delta; + uint64_t nsec_lo; + uint64_t nsec_hi; + uint64_t nsec; + + if (!(env->system_time_msr & 1ULL)) { + /* KVM clock not active */ + return 0; + } + cpu_physical_memory_read((env->system_time_msr & ~1ULL), &time, sizeof(time)); + + assert(time.tsc_timestamp <= migration_tsc); + delta = migration_tsc - time.tsc_timestamp; + if (time.tsc_shift < 0) { + delta >>= -time.tsc_shift; + } else { + delta <<= time.tsc_shift; + } + + mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul); + nsec = (nsec_lo >> 32) | (nsec_hi << 32); + return nsec + time.system_time; +} + static void kvmclock_pre_save(void *opaque) { KVMClockState *s = opaque; @@ -37,6 +79,23 @@ static void kvmclock_pre_save(void *opaque) if (s->clock_valid) { return; } + + cpu_synchronize_all_states(); + /* In theory, the cpu_synchronize_all_states() call above wouldn't + * affect the rest of the code, as the VCPU state inside CPUArchState + * is supposed to always match the VCPU state on the kernel side. + * + * In practice, calling cpu_synchronize_state() too soon will load the + * kernel-side APIC state into X86CPU.apic_state too early, APIC state + * won't be reloaded later because CPUState.vcpu_dirty==true, and + * outdated APIC state may be migrated to another host. + * + * The real fix would be to make sure outdated APIC state is read + * from the kernel again when necessary. While this is not fixed, we + * need the cpu_clean_all_dirty() call below. + */ + cpu_clean_all_dirty(); + ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); if (ret < 0) { fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); @@ -55,6 +114,12 @@ static int kvmclock_post_load(void *opaque, int version_id) { KVMClockState *s = opaque; struct kvm_clock_data data; + uint64_t time_at_migration = kvmclock_current_nsec(s); + + /* We can't rely on the migrated clock value, just discard it */ + if (time_at_migration) { + s->clock = time_at_migration; + } data.clock = s->clock; data.flags = 0; diff --git a/kvm-all.c b/kvm-all.c index cd2ccbe..692944e 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1547,6 +1547,11 @@ void kvm_cpu_synchronize_post_init(CPUArchState *env) env->kvm_vcpu_dirty = 0; } +void kvm_cpu_clean_state(CPUArchState *env) +{ + env->kvm_vcpu_dirty = false; +} + int kvm_cpu_exec(CPUArchState *env) { struct kvm_run *run = env->kvm_run; diff --git a/kvm.h b/kvm.h index 2a68a52..92a17d8 100644 --- a/kvm.h +++ b/kvm.h @@ -234,6 +234,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function, void kvm_cpu_synchronize_state(CPUArchState *env); void kvm_cpu_synchronize_post_reset(CPUArchState *env); void kvm_cpu_synchronize_post_init(CPUArchState *env); +void kvm_cpu_clean_state(CPUArchState *cpu); /* generic hooks - to be moved/refactored once there are more users */ @@ -258,6 +259,12 @@ static inline void cpu_synchronize_post_init(CPUArchState *env) } } +static inline void cpu_clean_state(CPUArchState *env) +{ + if (kvm_enabled()) { + kvm_cpu_clean_state(env); + } +} #if !defined(CONFIG_USER_ONLY) int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,