diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./Config.mk xen-4.4.2/./Config.mk --- xen-4.4.1/./Config.mk 2015-07-20 11:32:57.000000000 +0200 +++ xen-4.4.2/./Config.mk 2015-07-20 11:39:03.487092159 +0200 @@ -218,6 +218,8 @@ XEN_EXTFILES_URL ?= http://xenbits.xen.o # the internet. The original download URL is preserved as a comment # near the place in the Xen Makefiles where the file is used. +QEMU_UPSTREAM_URL ?= $(XEN_ROOT)/tools/qemu-xen + ifeq ($(GIT_HTTP),y) QEMU_REMOTE ?= http://xenbits.xen.org/git-http/qemu-xen-4.4-testing.git else @@ -234,7 +236,7 @@ QEMU_UPSTREAM_URL ?= git://xenbits.xen.o SEABIOS_UPSTREAM_URL ?= git://xenbits.xen.org/seabios.git endif OVMF_UPSTREAM_REVISION ?= 447d264115c476142f884af0be287622cd244423 -QEMU_UPSTREAM_REVISION ?= qemu-xen-4.4.1 +QEMU_UPSTREAM_REVISION ?= qemu-xen-4.4.2 SEABIOS_UPSTREAM_TAG ?= rel-1.7.3.1 # Fri Aug 2 14:12:09 2013 -0400 # Fix bug in CBFS file walking with compressed files. @@ -244,11 +246,12 @@ ETHERBOOT_NICS ?= rtl8139 e1000 # Specify which qemu-dm to use. This may be `ioemu' to use the old # Mercurial in-tree version, or a local directory, or a git URL. # CONFIG_QEMU ?= `pwd`/$(XEN_ROOT)/../qemu-xen.git -CONFIG_QEMU ?= $(QEMU_REMOTE) +# CONFIG_QEMU ?= $(QEMU_REMOTE) +CONFIG_QEMU ?= $(XEN_ROOT)/tools/qemu-xen-traditional -QEMU_TAG ?= xen-4.4.1 -# Tue Apr 8 16:50:06 2014 +0000 -# qemu-xen-trad: free all the pirqs for msi/msix when driver unloads +QEMU_TAG ?= xen-4.4.2 +# Mon Feb 2 16:49:59 2015 +0000 +# cirrus: fix an uninitialized variable # Short answer -- do not enable this unless you know what you are # doing and are prepared for some pain. diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./docs/misc/xen-command-line.markdown xen-4.4.2/./docs/misc/xen-command-line.markdown --- xen-4.4.1/./docs/misc/xen-command-line.markdown 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./docs/misc/xen-command-line.markdown 2015-03-19 16:06:48.000000000 +0100 @@ -167,10 +167,13 @@ developers wishing Xen to fall back to o ### ats > `= ` -> Default: `true` +> Default: `false` + +Permits Xen to set up and use PCI Address Translation Services. This is a +performance optimisation for PCI Passthrough. -Permits Xen to set up and use PCI Address Translation Services, which -is required for PCI Passthrough. +**WARNING: Xen cannot currently safely use ATS because of its synchronous wait +loops for Queued Invalidation completions.** ### availmem > `= ` @@ -494,6 +497,13 @@ Either force retrieval of monitor EDID i disable it (edid=no). This option should not normally be required except for debugging purposes. +### efi-rs +> `= ` + +> Default: `true` + +Force or disable use of EFI runtime services. + ### extra\_guest\_irqs > `= [][,]` @@ -1005,17 +1015,21 @@ wrong behaviour (see handle\_pmc\_quirk( If 'vpmu=bts' is specified the virtualisation of the Branch Trace Store (BTS) feature is switched on on Intel processors supporting this feature. +Note that if **watchdog** option is also specified vpmu will be turned off. + *Warning:* As the BTS virtualisation is not 100% safe and because of the nehalem quirk don't use the vpmu flag on production systems with Intel cpus! ### watchdog -> `= ` +> `= force | ` > Default: `false` Run an NMI watchdog on each processor. If a processor is stuck for -longer than the **watchdog\_timeout**, a panic occurs. +longer than the **watchdog\_timeout**, a panic occurs. When `force` is +specified, in addition to running an NMI watchdog on each processor, +unknown NMIs will still be processed. ### watchdog\_timeout > `= ` @@ -1035,9 +1049,11 @@ Permit use of x2apic setup for SMP envir ### x2apic\_phys > `= ` -> Default: `true` +> Default: `true` if **FADT** mandates physical mode, `false` otherwise. -Use the x2apic physical apic driver. The alternative is the x2apic cluster driver. +In the case that x2apic is in use, this option switches between physical and +clustered mode. The default, given no hint from the **FADT**, is cluster +mode. ### xsave > `= ` diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./qemu/hw/cirrus_vga.c xen-4.4.2/./qemu/hw/cirrus_vga.c --- xen-4.4.1/./qemu/hw/cirrus_vga.c 2014-07-02 16:54:37.000000000 +0200 +++ xen-4.4.2/./qemu/hw/cirrus_vga.c 2015-02-16 15:51:17.000000000 +0100 @@ -1658,8 +1658,8 @@ cirrus_hook_read_cr(CirrusVGAState * s, default: #ifdef DEBUG_CIRRUS printf("cirrus: inport cr_index %02x\n", reg_index); - *reg_value = 0xff; #endif + *reg_value = 0xff; break; } diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./tools/libxc/xc_linux_osdep.c xen-4.4.2/./tools/libxc/xc_linux_osdep.c --- xen-4.4.1/./tools/libxc/xc_linux_osdep.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./tools/libxc/xc_linux_osdep.c 2015-03-19 16:06:48.000000000 +0100 @@ -92,14 +92,32 @@ static void *linux_privcmd_alloc_hyperca { size_t size = npages * XC_PAGE_SIZE; void *p; + int rc, saved_errno; /* Address returned by mmap is page aligned. */ p = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_LOCKED, -1, 0); + if ( p == MAP_FAILED ) + { + PERROR("xc_alloc_hypercall_buffer: mmap failed"); + return NULL; + } /* Do not copy the VMA to child process on fork. Avoid the page being COW on hypercall. */ - madvise(p, npages * XC_PAGE_SIZE, MADV_DONTFORK); + rc = madvise(p, npages * XC_PAGE_SIZE, MADV_DONTFORK); + if ( rc < 0 ) + { + PERROR("xc_alloc_hypercall_buffer: madvise failed"); + goto out; + } + return p; + +out: + saved_errno = errno; + (void)munmap(p, size); + errno = saved_errno; + return NULL; } static void linux_privcmd_free_hypercall_buffer(xc_interface *xch, xc_osdep_handle h, void *ptr, int npages) diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./tools/libxl/libxl.c xen-4.4.2/./tools/libxl/libxl.c --- xen-4.4.1/./tools/libxl/libxl.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./tools/libxl/libxl.c 2015-03-19 16:06:48.000000000 +0100 @@ -2678,7 +2678,7 @@ void libxl__device_disk_local_initiate_a } if (dev != NULL) - dls->diskpath = strdup(dev); + dls->diskpath = libxl__strdup(gc, dev); dls->callback(egc, dls, 0); return; @@ -4551,6 +4551,12 @@ libxl_vcpuinfo *libxl_list_vcpu(libxl_ct GC_FREE; return NULL; } + + if (domaininfo.max_vcpu_id == XEN_INVALID_MAX_VCPU_ID) { + GC_FREE; + return NULL; + } + *nr_cpus_out = libxl_get_max_cpus(ctx); ret = ptr = libxl__calloc(NOGC, domaininfo.max_vcpu_id + 1, sizeof(libxl_vcpuinfo)); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./tools/libxl/libxl_create.c xen-4.4.2/./tools/libxl/libxl_create.c --- xen-4.4.1/./tools/libxl/libxl_create.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./tools/libxl/libxl_create.c 2015-03-19 16:06:48.000000000 +0100 @@ -1068,6 +1068,7 @@ static void domcreate_launch_dm(libxl__e "failed give dom%d access to ioports %"PRIx32"-%"PRIx32, domid, io->first, io->first + io->number - 1); ret = ERROR_FAIL; + goto error_out; } } @@ -1083,6 +1084,7 @@ static void domcreate_launch_dm(libxl__e if (ret < 0) { LOGE(ERROR, "failed give dom%d access to irq %d", domid, irq); ret = ERROR_FAIL; + goto error_out; } } @@ -1099,6 +1101,7 @@ static void domcreate_launch_dm(libxl__e "failed give dom%d access to iomem range %"PRIx64"-%"PRIx64, domid, io->start, io->start + io->number - 1); ret = ERROR_FAIL; + goto error_out; } } diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./tools/libxl/libxl_internal.c xen-4.4.2/./tools/libxl/libxl_internal.c --- xen-4.4.1/./tools/libxl/libxl_internal.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./tools/libxl/libxl_internal.c 2015-03-19 16:06:48.000000000 +0100 @@ -275,10 +275,12 @@ _hidden int libxl__parse_mac(const char char *endptr; int i; - for (i = 0, tok = s; *tok && (i < 6); ++i, tok += 3) { + for (i = 0, tok = s; *tok && (i < 6); ++i, tok = endptr) { mac[i] = strtol(tok, &endptr, 16); if (endptr != (tok + 2) || (*endptr != '\0' && *endptr != ':') ) return ERROR_INVAL; + if (*endptr == ':') + endptr++; } if ( i != 6 ) return ERROR_INVAL; diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./tools/libxl/xl_cmdimpl.c xen-4.4.2/./tools/libxl/xl_cmdimpl.c --- xen-4.4.1/./tools/libxl/xl_cmdimpl.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./tools/libxl/xl_cmdimpl.c 2015-03-19 16:06:48.000000000 +0100 @@ -4580,10 +4580,8 @@ static void print_domain_vcpuinfo(uint32 vcpuinfo = libxl_list_vcpu(ctx, domid, &nb_vcpu, &nrcpus); - if (!vcpuinfo) { - fprintf(stderr, "libxl_list_vcpu failed.\n"); + if (!vcpuinfo) return; - } for (i = 0; i < nb_vcpu; i++) { print_vcpuinfo(domid, &vcpuinfo[i], nr_cpus); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./tools/pygrub/src/GrubConf.py xen-4.4.2/./tools/pygrub/src/GrubConf.py --- xen-4.4.1/./tools/pygrub/src/GrubConf.py 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./tools/pygrub/src/GrubConf.py 2015-03-19 16:06:48.000000000 +0100 @@ -231,7 +231,7 @@ class _GrubConfigFile(object): if val == "saved": self._default = 0 else: - self._default = int(val) + self._default = val if self._default < 0: raise ValueError, "default must be positive number" @@ -431,11 +431,11 @@ class Grub2ConfigFile(_GrubConfigFile): if self.commands.has_key(com): if self.commands[com] is not None: - if arg.strip() == "${saved_entry}": + arg_strip = arg.strip() + if arg_strip == "${saved_entry}" or arg_strip == "${next_entry}": + logging.warning("grub2's saved_entry/next_entry not supported") arg = "0" - elif arg.strip() == "${next_entry}": - arg = "0" - setattr(self, self.commands[com], arg.strip()) + setattr(self, self.commands[com], arg_strip) else: logging.info("Ignored directive %s" %(com,)) elif com.startswith('set:'): diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./tools/pygrub/src/pygrub xen-4.4.2/./tools/pygrub/src/pygrub --- xen-4.4.1/./tools/pygrub/src/pygrub 2015-07-20 11:32:57.000000000 +0200 +++ xen-4.4.2/./tools/pygrub/src/pygrub 2015-07-20 11:39:03.311092161 +0200 @@ -432,7 +432,9 @@ class Grub: map(lambda x: (x,grub.ExtLinuxConf.ExtLinuxConfigFile), ["/boot/isolinux/isolinux.cfg", "/boot/extlinux/extlinux.conf", - "/boot/extlinux.conf"]) + \ + "/boot/extlinux.conf", + "/extlinux/extlinux.conf", + "/extlinux.conf"]) + \ map(lambda x: (x,grub.GrubConf.GrubConfigFile), ["/boot/grub/menu.lst", "/boot/grub/grub.conf", "/grub/menu.lst", "/grub/grub.conf"]) @@ -459,13 +461,39 @@ class Grub: del f self.cf.parse(buf) + def image_index(self): + if isinstance(self.cf.default, int): + sel = self.cf.default + elif self.cf.default.isdigit(): + sel = int(self.cf.default) + else: + # We don't fully support submenus. Look for the leaf value in + # "submenu0>submenu1>...>menuentry" and hope that it's unique. + title = self.cf.default + while 1: + try: + title = re.search('(\S)>(\S.+$)',title).group(2) + except AttributeError: + break + + # Map string to index in images array + sel = 0 + for i in range(len(self.cf.images)): + if self.cf.images[i].title == title: + sel = i + break + + # If the selected (default) image doesn't exist we select the first entry + if sel > len(self.cf.images): + logging.warning("Default image not found") + sel = 0 + + return sel + def run(self): timeout = int(self.cf.timeout) + self.selected_image = self.image_index() - self.selected_image = self.cf.default - # If the selected (default) image doesn't exist we select the first entry - if self.selected_image > len(self.cf.images): - self.selected_image = 0 self.isdone = False while not self.isdone: self.run_main(timeout) @@ -610,7 +638,7 @@ def run_grub(file, entry, fs, cfg_args): if interactive and not list_entries: curses.wrapper(run_main) else: - sel = g.cf.default + sel = g.image_index() # set the entry to boot as requested if entry is not None: diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/arm/p2m.c xen-4.4.2/./xen/arch/arm/p2m.c --- xen-4.4.1/./xen/arch/arm/p2m.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/arm/p2m.c 2015-03-19 16:06:48.000000000 +0100 @@ -688,13 +688,29 @@ struct page_info *get_page_from_gva(stru { struct p2m_domain *p2m = &d->arch.p2m; struct page_info *page = NULL; - paddr_t maddr; - - ASSERT(d == current->domain); + paddr_t maddr = 0; + int rc; spin_lock(&p2m->lock); - if ( gvirt_to_maddr(va, &maddr, flags) ) + if ( unlikely(d != current->domain) ) + { + unsigned long irq_flags; + + local_irq_save(irq_flags); + p2m_load_VTTBR(d); + + rc = gvirt_to_maddr(va, &maddr, flags); + + p2m_load_VTTBR(current->domain); + local_irq_restore(irq_flags); + } + else + { + rc = gvirt_to_maddr(va, &maddr, flags); + } + + if ( rc ) goto err; if ( !mfn_valid(maddr >> PAGE_SHIFT) ) diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/arm/time.c xen-4.4.2/./xen/arch/arm/time.c --- xen-4.4.1/./xen/arch/arm/time.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/arm/time.c 2015-03-19 16:06:48.000000000 +0100 @@ -213,6 +213,19 @@ static void timer_interrupt(int irq, voi static void vtimer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs) { + /* + * Edge-triggered interrupts can be used for the virtual timer. Even + * if the timer output signal is masked in the context switch, the + * GIC will keep track that of any interrupts raised while IRQS are + * disabled. As soon as IRQs are re-enabled, the virtual interrupt + * will be injected to Xen. + * + * If an IDLE vCPU was scheduled next then we should ignore the + * interrupt. + */ + if ( unlikely(is_idle_vcpu(current)) ) + return; + current->arch.virt_timer.ctl = READ_SYSREG32(CNTV_CTL_EL0); WRITE_SYSREG32(current->arch.virt_timer.ctl | CNTx_CTL_MASK, CNTV_CTL_EL0); vgic_vcpu_inject_irq(current, current->arch.virt_timer.irq, 1); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/arm/traps.c xen-4.4.2/./xen/arch/arm/traps.c --- xen-4.4.1/./xen/arch/arm/traps.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/arm/traps.c 2015-03-19 16:06:48.000000000 +0100 @@ -888,7 +888,7 @@ static void show_guest_stack(struct vcpu return; } - page = get_page_from_gva(current->domain, sp, GV2M_READ); + page = get_page_from_gva(v->domain, sp, GV2M_READ); if ( page == NULL ) { printk("Failed to convert stack to physical address\n"); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/arm/vgic.c xen-4.4.2/./xen/arch/arm/vgic.c --- xen-4.4.1/./xen/arch/arm/vgic.c 2015-07-20 11:32:57.000000000 +0200 +++ xen-4.4.2/./xen/arch/arm/vgic.c 2015-03-19 16:06:48.000000000 +0100 @@ -95,10 +95,7 @@ int domain_vgic_init(struct domain *d) d->arch.vgic.pending_irqs = xzalloc_array(struct pending_irq, d->arch.vgic.nr_lines); if ( d->arch.vgic.pending_irqs == NULL ) - { - xfree(d->arch.vgic.shared_irqs); return -ENOMEM; - } for (i=0; iarch.vgic.nr_lines; i++) { @@ -509,8 +506,8 @@ static int vgic_distr_mmio_write(struct vgic_lock_rank(v, rank); tr = rank->ienable; rank->ienable |= *r; - vgic_unlock_rank(v, rank); vgic_enable_irqs(v, (*r) & (~tr), gicd_reg - GICD_ISENABLER); + vgic_unlock_rank(v, rank); return 1; case GICD_ICENABLER ... GICD_ICENABLERN: @@ -520,8 +517,8 @@ static int vgic_distr_mmio_write(struct vgic_lock_rank(v, rank); tr = rank->ienable; rank->ienable &= ~*r; - vgic_unlock_rank(v, rank); vgic_disable_irqs(v, (*r) & tr, gicd_reg - GICD_ICENABLER); + vgic_unlock_rank(v, rank); return 1; case GICD_ISPENDR ... GICD_ISPENDRN: diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/acpi/cpu_idle.c xen-4.4.2/./xen/arch/x86/acpi/cpu_idle.c --- xen-4.4.1/./xen/arch/x86/acpi/cpu_idle.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/acpi/cpu_idle.c 2015-03-19 16:06:48.000000000 +0100 @@ -300,7 +300,11 @@ void mwait_idle_with_hints(unsigned int s_time_t expires = per_cpu(timer_deadline, cpu); if ( boot_cpu_has(X86_FEATURE_CLFLUSH_MONITOR) ) + { + mb(); clflush((void *)&mwait_wakeup(cpu)); + mb(); + } __monitor((void *)&mwait_wakeup(cpu), 0, 0); smp_mb(); @@ -950,7 +954,7 @@ static void set_cx( cx->target_residency = cx->latency * latency_factor; smp_wmb(); - acpi_power->count++; + acpi_power->count += (cx->type != ACPI_STATE_C1); if ( cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2 ) acpi_power->safe_state = cx; } diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/crash.c xen-4.4.2/./xen/arch/x86/crash.c --- xen-4.4.1/./xen/arch/x86/crash.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/crash.c 2015-03-19 16:06:48.000000000 +0100 @@ -38,7 +38,7 @@ static DEFINE_PER_CPU_READ_MOSTLY(bool_t /* This becomes the NMI handler for non-crashing CPUs, when Xen is crashing. */ void __attribute__((noreturn)) do_nmi_crash(struct cpu_user_regs *regs) { - int cpu = smp_processor_id(); + unsigned int cpu = smp_processor_id(); /* nmi_shootdown_cpus() should ensure that this assertion is correct. */ ASSERT(cpu != crashing_cpu); @@ -116,7 +116,7 @@ void __attribute__((noreturn)) do_nmi_cr static void nmi_shootdown_cpus(void) { unsigned long msecs; - int i, cpu = smp_processor_id(); + unsigned int cpu = smp_processor_id(); disable_lapic_nmi_watchdog(); local_irq_disable(); @@ -126,37 +126,25 @@ static void nmi_shootdown_cpus(void) cpumask_andnot(&waiting_to_crash, &cpu_online_map, cpumask_of(cpu)); - /* Change NMI trap handlers. Non-crashing pcpus get nmi_crash which - * invokes do_nmi_crash (above), which cause them to write state and - * fall into a loop. The crashing pcpu gets the nop handler to - * cause it to return to this function ASAP. + /* + * Disable IST for MCEs to avoid stack corruption race conditions, and + * change the NMI handler to a nop to avoid deviation from this codepath. */ - for ( i = 0; i < nr_cpu_ids; i++ ) - { - if ( idt_tables[i] == NULL ) - continue; + _set_gate_lower(&idt_tables[cpu][TRAP_nmi], 14, 0, &trap_nop); + set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE); - if ( i == cpu ) - { - /* - * Disable the interrupt stack tables for this cpu's MCE and NMI - * handlers, and alter the NMI handler to have no operation. - * Disabling the stack tables prevents stack corruption race - * conditions, while changing the handler helps prevent cascading - * faults; we are certainly going to crash by this point. - * - * This update is safe from a security point of view, as this pcpu - * is never going to try to sysret back to a PV vcpu. - */ - _set_gate_lower(&idt_tables[i][TRAP_nmi], 14, 0, &trap_nop); - set_ist(&idt_tables[i][TRAP_machine_check], IST_NONE); - } - else - { - /* Do not update stack table for other pcpus. */ - _update_gate_addr_lower(&idt_tables[i][TRAP_nmi], &nmi_crash); - } - } + /* + * Ideally would be: + * exception_table[TRAP_nmi] = &do_nmi_crash; + * + * but the exception_table is read only. Borrow an unused fixmap entry + * to construct a writable mapping. + */ + set_fixmap(FIX_TBOOT_MAP_ADDRESS, __pa(&exception_table[TRAP_nmi])); + write_atomic((unsigned long *) + (fix_to_virt(FIX_TBOOT_MAP_ADDRESS) + + ((unsigned long)&exception_table[TRAP_nmi] & ~PAGE_MASK)), + (unsigned long)&do_nmi_crash); /* Ensure the new callback function is set before sending out the NMI. */ wmb(); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/domain_page.c xen-4.4.2/./xen/arch/x86/domain_page.c --- xen-4.4.1/./xen/arch/x86/domain_page.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/domain_page.c 2015-03-19 16:06:48.000000000 +0100 @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -37,11 +38,14 @@ static inline struct vcpu *mapcache_curr */ if ( unlikely(pagetable_is_null(v->arch.guest_table)) && is_pv_vcpu(v) ) { + unsigned long cr3; + /* If we really are idling, perform lazy context switch now. */ if ( (v = idle_vcpu[smp_processor_id()]) == current ) sync_local_execstate(); /* We must now be running on the idle page table. */ - ASSERT(read_cr3() == __pa(idle_pg_table)); + ASSERT((cr3 = read_cr3()) == __pa(idle_pg_table) || + (efi_enabled && cr3 == efi_rs_page_table())); } return v; diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/efi/boot.c xen-4.4.2/./xen/arch/x86/efi/boot.c --- xen-4.4.1/./xen/arch/x86/efi/boot.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/efi/boot.c 2015-03-19 16:06:48.000000000 +0100 @@ -594,11 +594,12 @@ static void __init setup_efi_pci(void) struct efi_pci_rom *last = NULL; status = efi_bs->LocateHandle(ByProtocol, &pci_guid, NULL, &size, NULL); - if ( status == EFI_BUFFER_TOO_SMALL ) - status = efi_bs->AllocatePool(EfiLoaderData, size, (void **)&handles); - if ( !EFI_ERROR(status) ) - status = efi_bs->LocateHandle(ByProtocol, &pci_guid, NULL, &size, - handles); + if ( status != EFI_BUFFER_TOO_SMALL ) + return; + status = efi_bs->AllocatePool(EfiLoaderData, size, (void **)&handles); + if ( EFI_ERROR(status) ) + return; + status = efi_bs->LocateHandle(ByProtocol, &pci_guid, NULL, &size, handles); if ( EFI_ERROR(status) ) size = 0; @@ -781,9 +782,8 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY EFI_GRAPHICS_OUTPUT_MODE_INFORMATION *mode_info; EFI_FILE_HANDLE dir_handle; union string section = { NULL }, name; - struct e820entry *e; u64 efer; - bool_t base_video = 0; + bool_t base_video = 0, retry; efi_ih = ImageHandle; efi_bs = SystemTable->BootServices; @@ -1386,68 +1386,78 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY if ( mbi.mem_upper < xen_phys_start ) blexit(L"Out of static memory"); efi_memmap = (void *)(long)mbi.mem_upper; - status = efi_bs->GetMemoryMap(&efi_memmap_size, efi_memmap, &map_key, - &efi_mdesc_size, &mdesc_ver); - if ( EFI_ERROR(status) ) - blexit(L"Cannot obtain memory map"); - - /* Populate E820 table and check trampoline area availability. */ - e = e820map - 1; - for ( i = 0; i < efi_memmap_size; i += efi_mdesc_size ) + for ( retry = 0; ; retry = 1 ) { - EFI_MEMORY_DESCRIPTOR *desc = efi_memmap + i; - u64 len = desc->NumberOfPages << EFI_PAGE_SHIFT; - u32 type; + struct e820entry *e; - switch ( desc->Type ) + status = efi_bs->GetMemoryMap(&efi_memmap_size, efi_memmap, &map_key, + &efi_mdesc_size, &mdesc_ver); + if ( EFI_ERROR(status) ) + PrintErrMesg(L"Cannot obtain memory map", status); + + /* Populate E820 table and check trampoline area availability. */ + e = e820map - 1; + for ( e820nr = i = 0; i < efi_memmap_size; i += efi_mdesc_size ) { - default: - type = E820_RESERVED; - break; - case EfiConventionalMemory: - case EfiBootServicesCode: - case EfiBootServicesData: - if ( !trampoline_phys && desc->PhysicalStart + len <= 0x100000 && - len >= cfg.size && desc->PhysicalStart + len > cfg.addr ) - cfg.addr = (desc->PhysicalStart + len - cfg.size) & PAGE_MASK; - /* fall through */ - case EfiLoaderCode: - case EfiLoaderData: - if ( desc->Attribute & EFI_MEMORY_WB ) - type = E820_RAM; + EFI_MEMORY_DESCRIPTOR *desc = efi_memmap + i; + u64 len = desc->NumberOfPages << EFI_PAGE_SHIFT; + u32 type; + + switch ( desc->Type ) + { + default: + type = E820_RESERVED; + break; + case EfiConventionalMemory: + case EfiBootServicesCode: + case EfiBootServicesData: + if ( !trampoline_phys && + desc->PhysicalStart + len <= 0x100000 && + len >= cfg.size && + desc->PhysicalStart + len > cfg.addr ) + cfg.addr = (desc->PhysicalStart + len - cfg.size) & + PAGE_MASK; + /* fall through */ + case EfiLoaderCode: + case EfiLoaderData: + if ( desc->Attribute & EFI_MEMORY_WB ) + type = E820_RAM; + else + case EfiUnusableMemory: + type = E820_UNUSABLE; + break; + case EfiACPIReclaimMemory: + type = E820_ACPI; + break; + case EfiACPIMemoryNVS: + type = E820_NVS; + break; + } + if ( e820nr && type == e->type && + desc->PhysicalStart == e->addr + e->size ) + e->size += len; + else if ( !len || e820nr >= E820MAX ) + continue; else - case EfiUnusableMemory: - type = E820_UNUSABLE; - break; - case EfiACPIReclaimMemory: - type = E820_ACPI; - break; - case EfiACPIMemoryNVS: - type = E820_NVS; - break; + { + ++e; + e->addr = desc->PhysicalStart; + e->size = len; + e->type = type; + ++e820nr; + } } - if ( e820nr && type == e->type && - desc->PhysicalStart == e->addr + e->size ) - e->size += len; - else if ( !len || e820nr >= E820MAX ) - continue; - else + if ( !trampoline_phys ) { - ++e; - e->addr = desc->PhysicalStart; - e->size = len; - e->type = type; - ++e820nr; + if ( !cfg.addr ) + blexit(L"No memory for trampoline"); + relocate_trampoline(cfg.addr); } - } - if ( !trampoline_phys ) - { - if ( !cfg.addr ) - blexit(L"No memory for trampoline"); - relocate_trampoline(cfg.addr); - } - status = efi_bs->ExitBootServices(ImageHandle, map_key); + status = efi_bs->ExitBootServices(ImageHandle, map_key); + if ( status != EFI_INVALID_PARAMETER || retry ) + break; + } if ( EFI_ERROR(status) ) PrintErrMesg(L"Cannot exit boot services", status); @@ -1496,6 +1506,10 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY for( ; ; ); /* not reached */ } + +static bool_t __initdata efi_rs_enable = 1; +boolean_param("efi-rs", efi_rs_enable); + #ifndef USE_SET_VIRTUAL_ADDRESS_MAP static __init void copy_mapping(unsigned long mfn, unsigned long end, bool_t (*is_valid)(unsigned long smfn, @@ -1569,7 +1583,7 @@ void __init efi_init_memory(void) desc->PhysicalStart, desc->PhysicalStart + len - 1, desc->Type, desc->Attribute); - if ( !(desc->Attribute & EFI_MEMORY_RUNTIME) ) + if ( !efi_rs_enable || !(desc->Attribute & EFI_MEMORY_RUNTIME) ) continue; desc->VirtualStart = INVALID_VIRTUAL_ADDRESS; @@ -1633,6 +1647,12 @@ void __init efi_init_memory(void) } } + if ( !efi_rs_enable ) + { + efi_fw_vendor = NULL; + return; + } + #ifdef USE_SET_VIRTUAL_ADDRESS_MAP efi_rs->SetVirtualAddressMap(efi_memmap_size, efi_mdesc_size, mdesc_ver, efi_memmap); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/efi/runtime.c xen-4.4.2/./xen/arch/x86/efi/runtime.c --- xen-4.4.1/./xen/arch/x86/efi/runtime.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/efi/runtime.c 2015-03-19 16:06:48.000000000 +0100 @@ -51,6 +51,9 @@ unsigned long efi_rs_enter(void) static const u32 mxcsr = MXCSR_DEFAULT; unsigned long cr3 = read_cr3(); + if ( !efi_l4_pgtable ) + return 0; + save_fpu_enable(); asm volatile ( "fldcw %0" :: "m" (fcw) ); asm volatile ( "ldmxcsr %0" :: "m" (mxcsr) ); @@ -78,6 +81,8 @@ unsigned long efi_rs_enter(void) void efi_rs_leave(unsigned long cr3) { + if ( !cr3 ) + return; write_cr3(cr3); if ( is_pv_vcpu(current) && !is_idle_vcpu(current) ) { @@ -93,12 +98,19 @@ void efi_rs_leave(unsigned long cr3) stts(); } +paddr_t efi_rs_page_table(void) +{ + return efi_l4_pgtable ? virt_to_maddr(efi_l4_pgtable) : 0; +} + unsigned long efi_get_time(void) { EFI_TIME time; EFI_STATUS status; unsigned long cr3 = efi_rs_enter(), flags; + if ( !cr3 ) + return 0; spin_lock_irqsave(&rtc_lock, flags); status = efi_rs->GetTime(&time, NULL); spin_unlock_irqrestore(&rtc_lock, flags); @@ -116,6 +128,8 @@ void efi_halt_system(void) EFI_STATUS status; unsigned long cr3 = efi_rs_enter(); + if ( !cr3 ) + return; status = efi_rs->ResetSystem(EfiResetShutdown, EFI_SUCCESS, 0, NULL); efi_rs_leave(cr3); @@ -127,6 +141,8 @@ void efi_reset_system(bool_t warm) EFI_STATUS status; unsigned long cr3 = efi_rs_enter(); + if ( !cr3 ) + return; status = efi_rs->ResetSystem(warm ? EfiResetWarm : EfiResetCold, EFI_SUCCESS, 0, NULL); efi_rs_leave(cr3); @@ -149,6 +165,8 @@ int efi_get_info(uint32_t idx, union xen { unsigned long cr3 = efi_rs_enter(); + if ( !cr3 ) + return -EOPNOTSUPP; info->version = efi_rs->Hdr.Revision; efi_rs_leave(cr3); break; @@ -158,6 +176,8 @@ int efi_get_info(uint32_t idx, union xen info->cfg.nent = efi_num_ct; break; case XEN_FW_EFI_VENDOR: + if ( !efi_fw_vendor ) + return -EOPNOTSUPP; info->vendor.revision = efi_fw_revision; n = info->vendor.bufsz / sizeof(*efi_fw_vendor); if ( !guest_handle_okay(guest_handle_cast(info->vendor.name, @@ -280,6 +300,8 @@ int efi_runtime_call(struct xenpf_efi_ru return -EINVAL; cr3 = efi_rs_enter(); + if ( !cr3 ) + return -EOPNOTSUPP; spin_lock_irqsave(&rtc_lock, flags); status = efi_rs->GetTime(cast_time(&op->u.get_time.time), &caps); spin_unlock_irqrestore(&rtc_lock, flags); @@ -300,6 +322,8 @@ int efi_runtime_call(struct xenpf_efi_ru return -EINVAL; cr3 = efi_rs_enter(); + if ( !cr3 ) + return -EOPNOTSUPP; spin_lock_irqsave(&rtc_lock, flags); status = efi_rs->SetTime(cast_time(&op->u.set_time)); spin_unlock_irqrestore(&rtc_lock, flags); @@ -314,6 +338,8 @@ int efi_runtime_call(struct xenpf_efi_ru return -EINVAL; cr3 = efi_rs_enter(); + if ( !cr3 ) + return -EOPNOTSUPP; spin_lock_irqsave(&rtc_lock, flags); status = efi_rs->GetWakeupTime(&enabled, &pending, cast_time(&op->u.get_wakeup_time)); @@ -336,6 +362,8 @@ int efi_runtime_call(struct xenpf_efi_ru return -EINVAL; cr3 = efi_rs_enter(); + if ( !cr3 ) + return -EOPNOTSUPP; spin_lock_irqsave(&rtc_lock, flags); status = efi_rs->SetWakeupTime(!!(op->misc & XEN_EFI_SET_WAKEUP_TIME_ENABLE), @@ -354,7 +382,10 @@ int efi_runtime_call(struct xenpf_efi_ru return -EINVAL; cr3 = efi_rs_enter(); - status = efi_rs->GetNextHighMonotonicCount(&op->misc); + if ( cr3 ) + status = efi_rs->GetNextHighMonotonicCount(&op->misc); + else + rc = -EOPNOTSUPP; efi_rs_leave(cr3); break; @@ -390,15 +421,20 @@ int efi_runtime_call(struct xenpf_efi_ru data = NULL; cr3 = efi_rs_enter(); - status = efi_rs->GetVariable( - name, cast_guid(&op->u.get_variable.vendor_guid), - &op->misc, &size, data); - efi_rs_leave(cr3); + if ( cr3 ) + { + status = efi_rs->GetVariable( + name, cast_guid(&op->u.get_variable.vendor_guid), + &op->misc, &size, data); + efi_rs_leave(cr3); - if ( !EFI_ERROR(status) && - copy_to_guest(op->u.get_variable.data, data, size) ) - rc = -EFAULT; - op->u.get_variable.size = size; + if ( !EFI_ERROR(status) && + copy_to_guest(op->u.get_variable.data, data, size) ) + rc = -EFAULT; + op->u.get_variable.size = size; + } + else + rc = -EOPNOTSUPP; xfree(data); xfree(name); @@ -428,9 +464,12 @@ int efi_runtime_call(struct xenpf_efi_ru else { cr3 = efi_rs_enter(); - status = efi_rs->SetVariable( - name, cast_guid(&op->u.set_variable.vendor_guid), - op->misc, op->u.set_variable.size, data); + if ( cr3 ) + status = efi_rs->SetVariable( + name, cast_guid(&op->u.set_variable.vendor_guid), + op->misc, op->u.set_variable.size, data); + else + rc = -EOPNOTSUPP; efi_rs_leave(cr3); } @@ -462,15 +501,21 @@ int efi_runtime_call(struct xenpf_efi_ru } cr3 = efi_rs_enter(); - status = efi_rs->GetNextVariableName( - &size, name.str, - cast_guid(&op->u.get_next_variable_name.vendor_guid)); - efi_rs_leave(cr3); + if ( cr3 ) + { + status = efi_rs->GetNextVariableName( + &size, name.str, + cast_guid(&op->u.get_next_variable_name.vendor_guid)); + efi_rs_leave(cr3); - if ( !EFI_ERROR(status) && - copy_to_guest(op->u.get_next_variable_name.name, name.raw, size) ) - rc = -EFAULT; - op->u.get_next_variable_name.size = size; + if ( !EFI_ERROR(status) && + copy_to_guest(op->u.get_next_variable_name.name, + name.raw, size) ) + rc = -EFAULT; + op->u.get_next_variable_name.size = size; + } + else + rc = -EOPNOTSUPP; xfree(name.raw); } @@ -507,7 +552,7 @@ int efi_runtime_call(struct xenpf_efi_ru } cr3 = efi_rs_enter(); - if ( (efi_rs->Hdr.Revision >> 16) < 2 ) + if ( !cr3 || (efi_rs->Hdr.Revision >> 16) < 2 ) { efi_rs_leave(cr3); return -EOPNOTSUPP; @@ -526,7 +571,7 @@ int efi_runtime_call(struct xenpf_efi_ru return -EINVAL; cr3 = efi_rs_enter(); - if ( (efi_rs->Hdr.Revision >> 16) < 2 ) + if ( !cr3 || (efi_rs->Hdr.Revision >> 16) < 2 ) { efi_rs_leave(cr3); return -EOPNOTSUPP; diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/efi/stub.c xen-4.4.2/./xen/arch/x86/efi/stub.c --- xen-4.4.1/./xen/arch/x86/efi/stub.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/efi/stub.c 2015-03-19 16:06:48.000000000 +0100 @@ -9,6 +9,12 @@ const bool_t efi_enabled = 0; void __init efi_init_memory(void) { } +paddr_t efi_rs_page_table(void) +{ + BUG(); + return 0; +} + unsigned long efi_get_time(void) { BUG(); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/hvm/hvm.c xen-4.4.2/./xen/arch/x86/hvm/hvm.c --- xen-4.4.1/./xen/arch/x86/hvm/hvm.c 2015-07-20 11:32:57.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/hvm/hvm.c 2015-03-19 16:06:48.000000000 +0100 @@ -1041,6 +1041,7 @@ static int hvm_load_cpu_xsave_states(str struct vcpu *v; struct hvm_hw_cpu_xsave *ctxt; struct hvm_save_descriptor *desc; + unsigned int i, desc_start; /* Which vcpu is this? */ vcpuid = hvm_load_instance(h); @@ -1081,15 +1082,8 @@ static int hvm_load_cpu_xsave_states(str save_area) + XSTATE_AREA_MIN_SIZE); return -EINVAL; } - size = HVM_CPU_XSAVE_SIZE(xfeature_mask); - if ( desc->length > size ) - { - printk(XENLOG_G_WARNING - "HVM%d.%d restore mismatch: xsave length %u > %u\n", - d->domain_id, vcpuid, desc->length, size); - return -EOPNOTSUPP; - } h->cur += sizeof (*desc); + desc_start = h->cur; ctxt = (struct hvm_hw_cpu_xsave *)&h->data[h->cur]; h->cur += desc->length; @@ -1109,10 +1103,24 @@ static int hvm_load_cpu_xsave_states(str size = HVM_CPU_XSAVE_SIZE(ctxt->xcr0_accum); if ( desc->length > size ) { + /* + * Xen 4.3.0, 4.2.3 and older used to send longer-than-needed + * xsave regions. Permit loading the record if the extra data + * is all zero. + */ + for ( i = size; i < desc->length; i++ ) + { + if ( h->data[desc_start + i] ) + { + printk(XENLOG_G_WARNING + "HVM%d.%u restore mismatch: xsave length %#x > %#x (non-zero data at %#x)\n", + d->domain_id, vcpuid, desc->length, size, i); + return -EOPNOTSUPP; + } + } printk(XENLOG_G_WARNING - "HVM%d.%d restore mismatch: xsave length %u > %u\n", + "HVM%d.%u restore mismatch: xsave length %#x > %#x\n", d->domain_id, vcpuid, desc->length, size); - return -EOPNOTSUPP; } /* Checking finished */ @@ -1121,7 +1129,8 @@ static int hvm_load_cpu_xsave_states(str if ( ctxt->xcr0_accum & XSTATE_NONLAZY ) v->arch.nonlazy_xstate_used = 1; memcpy(v->arch.xsave_area, &ctxt->save_area, - desc->length - offsetof(struct hvm_hw_cpu_xsave, save_area)); + min(desc->length, size) - offsetof(struct hvm_hw_cpu_xsave, + save_area)); return 0; } @@ -3537,10 +3546,10 @@ int hvm_do_hypercall(struct cpu_user_reg regs->r8, regs->r9); curr->arch.hvm_vcpu.hcall_64bit = 0; } + else if ( unlikely(is_pvh_vcpu(curr)) ) + regs->_eax = -ENOSYS; /* PVH 32bitfixme. */ else { - ASSERT(!is_pvh_vcpu(curr)); /* PVH 32bitfixme. */ - HVM_DBG_LOG(DBG_LEVEL_HCALL, "hcall%u(%x, %x, %x, %x, %x, %x)", eax, (uint32_t)regs->ebx, (uint32_t)regs->ecx, (uint32_t)regs->edx, (uint32_t)regs->esi, diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/hvm/svm/svm.c xen-4.4.2/./xen/arch/x86/hvm/svm/svm.c --- xen-4.4.1/./xen/arch/x86/hvm/svm/svm.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/hvm/svm/svm.c 2015-03-19 16:06:48.000000000 +0100 @@ -90,6 +90,15 @@ static bool_t amd_erratum383_found __rea static uint64_t osvw_length, osvw_status; static DEFINE_SPINLOCK(osvw_lock); +/* Only crash the guest if the problem originates in kernel mode. */ +static void svm_crash_or_fault(struct vcpu *v) +{ + if ( vmcb_get_cpl(v->arch.hvm_svm.vmcb) ) + hvm_inject_hw_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE); + else + domain_crash(v->domain); +} + void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len) { struct vcpu *curr = current; @@ -100,7 +109,7 @@ void __update_guest_eip(struct cpu_user_ if ( unlikely(inst_len > 15) ) { gdprintk(XENLOG_ERR, "Bad instruction length %u\n", inst_len); - domain_crash(curr->domain); + svm_crash_or_fault(curr); return; } @@ -2152,8 +2161,8 @@ void svm_vmexit_handler(struct cpu_user_ goto out; case NESTEDHVM_VMEXIT_FATALERROR: gdprintk(XENLOG_ERR, "unexpected nestedsvm_vmexit() error\n"); - goto exit_and_crash; - + domain_crash(v->domain); + goto out; default: BUG(); case NESTEDHVM_VMEXIT_ERROR: @@ -2166,18 +2175,22 @@ void svm_vmexit_handler(struct cpu_user_ case NESTEDHVM_VMEXIT_FATALERROR: gdprintk(XENLOG_ERR, "unexpected nestedsvm_check_intercepts() error\n"); - goto exit_and_crash; + domain_crash(v->domain); + goto out; default: gdprintk(XENLOG_INFO, "nestedsvm_check_intercepts() returned %i\n", nsret); - goto exit_and_crash; + domain_crash(v->domain); + goto out; } } if ( unlikely(exit_reason == VMEXIT_INVALID) ) { + gdprintk(XENLOG_ERR, "invalid VMCB state:\n"); svm_vmcb_dump(__func__, vmcb); - goto exit_and_crash; + domain_crash(v->domain); + goto out; } perfc_incra(svmexits, exit_reason); @@ -2212,13 +2225,13 @@ void svm_vmexit_handler(struct cpu_user_ case VMEXIT_EXCEPTION_DB: if ( !v->domain->debugger_attached ) - goto exit_and_crash; + goto unexpected_exit_type; domain_pause_for_debugger(); break; case VMEXIT_EXCEPTION_BP: if ( !v->domain->debugger_attached ) - goto exit_and_crash; + goto unexpected_exit_type; /* AMD Vol2, 15.11: INT3, INTO, BOUND intercepts do not update RIP. */ if ( (inst_len = __get_instruction_length(v, INSTR_INT3)) == 0 ) break; @@ -2455,12 +2468,12 @@ void svm_vmexit_handler(struct cpu_user_ break; default: - exit_and_crash: + unexpected_exit_type: gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = %#"PRIx64", " "exitinfo1 = %#"PRIx64", exitinfo2 = %#"PRIx64"\n", exit_reason, (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2); - domain_crash(v->domain); + svm_crash_or_fault(v); break; } diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/hvm/svm/vpmu.c xen-4.4.2/./xen/arch/x86/hvm/svm/vpmu.c --- xen-4.4.1/./xen/arch/x86/hvm/svm/vpmu.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/hvm/svm/vpmu.c 2015-03-19 16:06:48.000000000 +0100 @@ -398,9 +398,6 @@ static void amd_vpmu_destroy(struct vcpu { struct vpmu_struct *vpmu = vcpu_vpmu(v); - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - return; - if ( ((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set ) amd_vpmu_unset_msr_bitmap(v); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/hvm/vlapic.c xen-4.4.2/./xen/arch/x86/hvm/vlapic.c --- xen-4.4.1/./xen/arch/x86/hvm/vlapic.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/hvm/vlapic.c 2015-03-19 16:06:48.000000000 +0100 @@ -1161,6 +1161,9 @@ static int lapic_save_regs(struct domain for_each_vcpu ( d, v ) { + if ( hvm_funcs.sync_pir_to_irr ) + hvm_funcs.sync_pir_to_irr(v); + s = vcpu_vlapic(v); if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) != 0 ) break; diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/hvm/vmx/vmx.c xen-4.4.2/./xen/arch/x86/hvm/vmx/vmx.c --- xen-4.4.1/./xen/arch/x86/hvm/vmx/vmx.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/hvm/vmx/vmx.c 2015-03-19 16:06:48.000000000 +0100 @@ -1530,6 +1530,8 @@ static void vmx_process_isr(int isr, str { unsigned long status; u8 old; + unsigned int i; + const struct vlapic *vlapic = vcpu_vlapic(v); if ( isr < 0 ) isr = 0; @@ -1543,6 +1545,28 @@ static void vmx_process_isr(int isr, str status |= isr << VMX_GUEST_INTR_STATUS_SVI_OFFSET; __vmwrite(GUEST_INTR_STATUS, status); } + + /* + * Theoretically, only level triggered interrupts can have their + * corresponding bits set in the eoi exit bitmap. That is, the bits + * set in the eoi exit bitmap should also be set in TMR. But a periodic + * timer interrupt does not follow the rule: it is edge triggered, but + * requires its corresponding bit be set in the eoi exit bitmap. So we + * should not construct the eoi exit bitmap based on TMR. + * Here we will construct the eoi exit bitmap via (IRR | ISR). This + * means that EOIs to the interrupts that are set in the IRR or ISR will + * cause VM exits after restoring, regardless of the trigger modes. It + * is acceptable because the subsequent interrupts will set up the eoi + * bitmap correctly. + */ + for ( i = 0x10; i < NR_VECTORS; ++i ) + if ( vlapic_test_vector(i, &vlapic->regs->data[APIC_IRR]) || + vlapic_test_vector(i, &vlapic->regs->data[APIC_ISR]) ) + set_bit(i, v->arch.hvm_vmx.eoi_exit_bitmap); + + for ( i = 0; i < ARRAY_SIZE(v->arch.hvm_vmx.eoi_exit_bitmap); ++i ) + __vmwrite(EOI_EXIT_BITMAP(i), v->arch.hvm_vmx.eoi_exit_bitmap[i]); + vmx_vmcs_exit(v); } @@ -2090,6 +2114,7 @@ void vmx_vlapic_msr_changed(struct vcpu { int virtualize_x2apic_mode; struct vlapic *vlapic = vcpu_vlapic(v); + unsigned int msr; virtualize_x2apic_mode = ( (cpu_has_vmx_apic_reg_virt || cpu_has_vmx_virtual_intr_delivery) && @@ -2106,8 +2131,6 @@ void vmx_vlapic_msr_changed(struct vcpu if ( !vlapic_hw_disabled(vlapic) && (vlapic_base_address(vlapic) == APIC_DEFAULT_PHYS_BASE) ) { - unsigned int msr; - if ( virtualize_x2apic_mode && vlapic_x2apic_mode(vlapic) ) { v->arch.hvm_vmx.secondary_exec_control |= @@ -2136,15 +2159,15 @@ void vmx_vlapic_msr_changed(struct vcpu } } else - { v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - for ( msr = MSR_IA32_APICBASE_MSR; - msr <= MSR_IA32_APICBASE_MSR + 0xff; msr++ ) - vmx_enable_intercept_for_msr(v, msr, - MSR_TYPE_R | MSR_TYPE_W); - } } + if ( !(v->arch.hvm_vmx.secondary_exec_control & + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE) ) + for ( msr = MSR_IA32_APICBASE_MSR; + msr <= MSR_IA32_APICBASE_MSR + 0xff; msr++ ) + vmx_enable_intercept_for_msr(v, msr, MSR_TYPE_R | MSR_TYPE_W); + vmx_update_secondary_exec_control(v); vmx_vmcs_exit(v); } @@ -2161,17 +2184,19 @@ static int vmx_msr_write_intercept(unsig __vmwrite(GUEST_SYSENTER_CS, msr_content); break; case MSR_IA32_SYSENTER_ESP: + if ( !is_canonical_address(msr_content) ) + goto gp_fault; __vmwrite(GUEST_SYSENTER_ESP, msr_content); break; case MSR_IA32_SYSENTER_EIP: + if ( !is_canonical_address(msr_content) ) + goto gp_fault; __vmwrite(GUEST_SYSENTER_EIP, msr_content); break; case MSR_IA32_DEBUGCTLMSR: { int i, rc = 0; uint64_t supported = IA32_DEBUGCTLMSR_LBR | IA32_DEBUGCTLMSR_BTF; - if ( !msr_content ) - break; if ( msr_content & ~supported ) { /* Perhaps some other bits are supported in vpmu. */ @@ -2191,12 +2216,10 @@ static int vmx_msr_write_intercept(unsig } if ( (rc < 0) || - (vmx_add_host_load_msr(msr) < 0) ) + (msr_content && (vmx_add_host_load_msr(msr) < 0)) ) hvm_inject_hw_exception(TRAP_machine_check, 0); else - { __vmwrite(GUEST_IA32_DEBUGCTL, msr_content); - } break; } @@ -2540,7 +2563,7 @@ void vmx_vmexit_handler(struct cpu_user_ && ((intr_info & INTR_INFO_INTR_TYPE_MASK) == (X86_EVENTTYPE_NMI << 8)) ) { - do_nmi(regs); + exception_table[TRAP_nmi](regs); enable_nmis(); } break; @@ -3006,8 +3029,19 @@ void vmx_vmexit_handler(struct cpu_user_ /* fall through */ default: exit_and_crash: - gdprintk(XENLOG_ERR, "Bad vmexit (reason %#lx)\n", exit_reason); - domain_crash(v->domain); + { + struct segment_register ss; + + gdprintk(XENLOG_WARNING, "Bad vmexit (reason %#lx)\n", + exit_reason); + + vmx_get_segment_register(v, x86_seg_ss, &ss); + if ( ss.attr.fields.dpl ) + hvm_inject_hw_exception(TRAP_invalid_op, + HVM_DELIVER_NO_ERROR_CODE); + else + domain_crash(v->domain); + } break; } diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/hvm/vmx/vpmu_core2.c xen-4.4.2/./xen/arch/x86/hvm/vmx/vpmu_core2.c --- xen-4.4.1/./xen/arch/x86/hvm/vmx/vpmu_core2.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/hvm/vmx/vpmu_core2.c 2015-03-19 16:06:48.000000000 +0100 @@ -809,8 +809,6 @@ static void core2_vpmu_destroy(struct vc struct vpmu_struct *vpmu = vcpu_vpmu(v); struct core2_vpmu_context *core2_vpmu_cxt = vpmu->context; - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - return; xfree(core2_vpmu_cxt->pmu_enable); xfree(vpmu->context); if ( cpu_has_vmx_msr_bitmap ) diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/hvm/vpmu.c xen-4.4.2/./xen/arch/x86/hvm/vpmu.c --- xen-4.4.1/./xen/arch/x86/hvm/vpmu.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/hvm/vpmu.c 2015-03-19 16:06:48.000000000 +0100 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -247,10 +248,30 @@ void vpmu_initialise(struct vcpu *v) } } +static void vpmu_clear_last(void *arg) +{ + if ( this_cpu(last_vcpu) == arg ) + this_cpu(last_vcpu) = NULL; +} + void vpmu_destroy(struct vcpu *v) { struct vpmu_struct *vpmu = vcpu_vpmu(v); + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + return; + + /* + * Need to clear last_vcpu in case it points to v. + * We can check here non-atomically whether it is 'v' since + * last_vcpu can never become 'v' again at this point. + * We will test it again in vpmu_clear_last() with interrupts + * disabled to make sure we don't clear someone else. + */ + if ( per_cpu(last_vcpu, vpmu->last_pcpu) == v ) + on_selected_cpus(cpumask_of(vpmu->last_pcpu), + vpmu_clear_last, v, 1); + if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy ) vpmu->arch_vpmu_ops->arch_vpmu_destroy(v); } @@ -264,3 +285,15 @@ void vpmu_dump(struct vcpu *v) vpmu->arch_vpmu_ops->arch_vpmu_dump(v); } +static int __init vpmu_init(void) +{ + /* NMI watchdog uses LVTPC and HW counter */ + if ( opt_watchdog && opt_vpmu_enabled ) + { + printk(XENLOG_WARNING "NMI watchdog is enabled. Turning VPMU off.\n"); + opt_vpmu_enabled = 0; + } + + return 0; +} +__initcall(vpmu_init); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/io_apic.c xen-4.4.2/./xen/arch/x86/io_apic.c --- xen-4.4.1/./xen/arch/x86/io_apic.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/io_apic.c 2015-03-19 16:06:48.000000000 +0100 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -1091,7 +1092,7 @@ static inline void UNEXPECTED_IO_APIC(vo { } -static void /*__init*/ __print_IO_APIC(void) +static void /*__init*/ __print_IO_APIC(bool_t boot) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1112,6 +1113,9 @@ static void /*__init*/ __print_IO_APIC(v printk(KERN_INFO "testing the IO APIC.......................\n"); for (apic = 0; apic < nr_ioapics; apic++) { + if ( !boot ) + process_pending_softirqs(); + if (!nr_ioapic_entries[apic]) continue; @@ -1215,6 +1219,10 @@ static void /*__init*/ __print_IO_APIC(v printk(KERN_DEBUG "IRQ to pin mappings:\n"); for (i = 0; i < nr_irqs_gsi; i++) { struct irq_pin_list *entry = irq_2_pin + i; + + if ( !boot && !(i & 0x1f) ) + process_pending_softirqs(); + if (entry->pin < 0) continue; printk(KERN_DEBUG "IRQ%d ", irq_to_desc(i)->arch.vector); @@ -1235,12 +1243,12 @@ static void /*__init*/ __print_IO_APIC(v static void __init print_IO_APIC(void) { if (apic_verbosity != APIC_QUIET) - __print_IO_APIC(); + __print_IO_APIC(1); } static void _print_IO_APIC_keyhandler(unsigned char key) { - __print_IO_APIC(); + __print_IO_APIC(0); } static struct keyhandler print_IO_APIC_keyhandler = { .diagnostic = 1, @@ -2454,6 +2462,9 @@ void dump_ioapic_irq_info(void) for ( irq = 0; irq < nr_irqs_gsi; irq++ ) { + if ( !(irq & 0x1f) ) + process_pending_softirqs(); + entry = &irq_2_pin[irq]; if ( entry->pin == -1 ) continue; diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/irq.c xen-4.4.2/./xen/arch/x86/irq.c --- xen-4.4.1/./xen/arch/x86/irq.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/irq.c 2015-03-19 16:06:48.000000000 +0100 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -2231,6 +2232,8 @@ static void dump_irqs(unsigned char key) for ( irq = 0; irq < nr_irqs; irq++ ) { + if ( !(irq & 0x1f) ) + process_pending_softirqs(); desc = irq_to_desc(irq); @@ -2284,6 +2287,7 @@ static void dump_irqs(unsigned char key) xfree(ssid); } + process_pending_softirqs(); printk("Direct vector information:\n"); for ( i = FIRST_DYNAMIC_VECTOR; i < NR_VECTORS; ++i ) if ( direct_apic_vector[i] ) diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/mm.c xen-4.4.2/./xen/arch/x86/mm.c --- xen-4.4.1/./xen/arch/x86/mm.c 2015-07-20 11:32:57.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/mm.c 2015-03-19 16:06:48.000000000 +0100 @@ -121,6 +121,7 @@ #include #include #include +#include #include /* Mapping of the fixmap space needed early. */ @@ -2692,7 +2693,11 @@ int vcpu_destroy_pagetables(struct vcpu v->arch.cr3 = 0; - return rc; + /* + * put_page_and_type_preemptible() is liable to return -EINTR. The + * callers of us expect -ERESTART so convert it over. + */ + return rc != -EINTR ? rc : -ERESTART; } int new_guest_cr3(unsigned long mfn) @@ -4497,10 +4502,9 @@ struct memory_map_context struct xen_memory_map map; }; -static int handle_iomem_range(unsigned long s, unsigned long e, void *p) +static int _handle_iomem_range(unsigned long s, unsigned long e, + struct memory_map_context *ctxt) { - struct memory_map_context *ctxt = p; - if ( s > ctxt->s ) { e820entry_t ent; @@ -4523,6 +4527,31 @@ static int handle_iomem_range(unsigned l return 0; } +static int handle_iomem_range(unsigned long s, unsigned long e, void *p) +{ + int err = 0; + + do { + unsigned long low = -1UL; + unsigned int i; + + for ( i = 0; i < nr_ioapics; ++i ) + { + unsigned long mfn = paddr_to_pfn(mp_ioapics[i].mpc_apicaddr); + + if ( mfn >= s && mfn <= e && mfn < low ) + low = mfn; + } + if ( !(low + 1) ) + break; + if ( s < low ) + err = _handle_iomem_range(s, low - 1, p); + s = low + 1; + } while ( !err ); + + return err || s > e ? err : _handle_iomem_range(s, e, p); +} + int xenmem_add_to_physmap_one( struct domain *d, unsigned int space, @@ -4747,7 +4776,7 @@ long arch_memory_op(int op, XEN_GUEST_HA { unsigned long s = PFN_DOWN(e820.map[i].addr); - if ( s ) + if ( s > ctxt.s ) { rc = rangeset_report_ranges(current->domain->iomem_caps, ctxt.s, s - 1, diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/msi.c xen-4.4.2/./xen/arch/x86/msi.c --- xen-4.4.1/./xen/arch/x86/msi.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/msi.c 2015-03-19 16:06:48.000000000 +0100 @@ -496,15 +496,8 @@ int __setup_msi_irq(struct irq_desc *des int msi_free_irq(struct msi_desc *entry) { - unsigned int nr = entry->msi.nvec; - - if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) - { - unsigned long start; - start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1); - msix_put_fixmap(entry->dev->msix, virt_to_fix(start)); - nr = 1; - } + unsigned int nr = entry->msi_attrib.type != PCI_CAP_ID_MSIX + ? entry->msi.nvec : 1; while ( nr-- ) { @@ -515,6 +508,10 @@ int msi_free_irq(struct msi_desc *entry) iommu_update_ire_from_msi(entry + nr, NULL); } + if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) + msix_put_fixmap(entry->dev->msix, + virt_to_fix((unsigned long)entry->mask_base)); + list_del(&entry->list); xfree(entry); return 0; diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/nmi.c xen-4.4.2/./xen/arch/x86/nmi.c --- xen-4.4.1/./xen/arch/x86/nmi.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/nmi.c 2015-03-19 16:06:48.000000000 +0100 @@ -43,7 +43,32 @@ static DEFINE_PER_CPU(unsigned int, nmi_ /* opt_watchdog: If true, run a watchdog NMI on each processor. */ bool_t __initdata opt_watchdog = 0; -boolean_param("watchdog", opt_watchdog); + +/* watchdog_force: If true, process unknown NMIs when running the watchdog. */ +bool_t watchdog_force = 0; + +static void __init parse_watchdog(char *s) +{ + if ( !*s ) + { + opt_watchdog = 1; + return; + } + + switch ( parse_bool(s) ) + { + case 0: + opt_watchdog = 0; + return; + case 1: + opt_watchdog = 1; + return; + } + + if ( !strcmp(s, "force") ) + watchdog_force = opt_watchdog = 1; +} +custom_param("watchdog", parse_watchdog); /* opt_watchdog_timeout: Number of seconds to wait before panic. */ static unsigned int opt_watchdog_timeout = 5; @@ -82,6 +107,7 @@ int nmi_active; #define K7_EVNTSEL_USR (1 << 16) #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING +#define K7_EVENT_WIDTH 32 #define P6_EVNTSEL0_ENABLE (1 << 22) #define P6_EVNTSEL_INT (1 << 20) @@ -89,10 +115,12 @@ int nmi_active; #define P6_EVNTSEL_USR (1 << 16) #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 #define CORE_EVENT_CPU_CLOCKS_NOT_HALTED 0x3c +#define P6_EVENT_WIDTH 32 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) #define P4_CCCR_OVF_PMI0 (1<<26) #define P4_CCCR_OVF_PMI1 (1<<27) +#define P4_CCCR_OVF (1<<31) #define P4_CCCR_THRESHOLD(N) ((N)<<20) #define P4_CCCR_COMPLEMENT (1<<19) #define P4_CCCR_COMPARE (1<<18) @@ -433,8 +461,10 @@ int __init watchdog_setup(void) return 0; } -void nmi_watchdog_tick(struct cpu_user_regs * regs) +/* Returns false if this was not a watchdog NMI, true otherwise */ +bool_t nmi_watchdog_tick(struct cpu_user_regs *regs) { + bool_t watchdog_tick = 1; unsigned int sum = this_cpu(nmi_timer_ticks); if ( (this_cpu(last_irq_sums) == sum) && watchdog_enabled() ) @@ -460,8 +490,15 @@ void nmi_watchdog_tick(struct cpu_user_r if ( nmi_perfctr_msr ) { + uint64_t msr_content; + + /* Work out if this is a watchdog tick by checking for overflow. */ if ( nmi_perfctr_msr == MSR_P4_IQ_PERFCTR0 ) { + rdmsrl(MSR_P4_IQ_CCCR0, msr_content); + if ( !(msr_content & P4_CCCR_OVF) ) + watchdog_tick = 0; + /* * P4 quirks: * - An overflown perfctr will assert its interrupt @@ -474,14 +511,26 @@ void nmi_watchdog_tick(struct cpu_user_r } else if ( nmi_perfctr_msr == MSR_P6_PERFCTR0 ) { + rdmsrl(MSR_P6_PERFCTR0, msr_content); + if ( msr_content & (1ULL << P6_EVENT_WIDTH) ) + watchdog_tick = 0; + /* * Only P6 based Pentium M need to re-unmask the apic vector but * it doesn't hurt other P6 variants. */ apic_write(APIC_LVTPC, APIC_DM_NMI); } + else if ( nmi_perfctr_msr == MSR_K7_PERFCTR0 ) + { + rdmsrl(MSR_K7_PERFCTR0, msr_content); + if ( msr_content & (1ULL << K7_EVENT_WIDTH) ) + watchdog_tick = 0; + } write_watchdog_counter(NULL); } + + return watchdog_tick; } /* diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/tboot.c xen-4.4.2/./xen/arch/x86/tboot.c --- xen-4.4.1/./xen/arch/x86/tboot.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/tboot.c 2015-03-19 16:06:48.000000000 +0100 @@ -140,6 +140,7 @@ void __init tboot_probe(void) TXT_PUB_CONFIG_REGS_BASE + TXTCR_SINIT_BASE); tboot_copy_memory((unsigned char *)&sinit_size, sizeof(sinit_size), TXT_PUB_CONFIG_REGS_BASE + TXTCR_SINIT_SIZE); + __set_fixmap(FIX_TBOOT_MAP_ADDRESS, 0, 0); } /* definitions from xen/drivers/passthrough/vtd/iommu.h @@ -477,6 +478,8 @@ int __init tboot_parse_dmar_table(acpi_t dmar_table_raw = xmalloc_array(unsigned char, dmar_table_length); tboot_copy_memory(dmar_table_raw, dmar_table_length, pa); dmar_table = (struct acpi_table_header *)dmar_table_raw; + __set_fixmap(FIX_TBOOT_MAP_ADDRESS, 0, 0); + rc = dmar_handler(dmar_table); xfree(dmar_table_raw); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/traps.c xen-4.4.2/./xen/arch/x86/traps.c --- xen-4.4.1/./xen/arch/x86/traps.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/traps.c 2015-03-19 16:06:48.000000000 +0100 @@ -747,7 +747,7 @@ void pv_cpuid(struct cpu_user_regs *regs switch ( cpuid_leaf ) { - case 0xd: + case XSTATE_CPUID: { unsigned int _eax, _ebx, _ecx, _edx; /* EBX value of main leaf 0 depends on enabled xsave features */ @@ -765,7 +765,7 @@ void pv_cpuid(struct cpu_user_regs *regs b = _eax + _ebx; } } - break; + goto xstate; } } goto out; @@ -787,7 +787,7 @@ void pv_cpuid(struct cpu_user_regs *regs __clear_bit(X86_FEATURE_PSE36, &d); } - switch ( (uint32_t)regs->eax ) + switch ( regs->_eax ) { case 0x00000001: /* Modify Feature Information. */ @@ -822,7 +822,7 @@ void pv_cpuid(struct cpu_user_regs *regs break; case 0x00000007: - if ( regs->ecx == 0 ) + if ( regs->_ecx == 0 ) b &= (cpufeat_mask(X86_FEATURE_BMI1) | cpufeat_mask(X86_FEATURE_HLE) | cpufeat_mask(X86_FEATURE_AVX2) | @@ -835,9 +835,19 @@ void pv_cpuid(struct cpu_user_regs *regs a = c = d = 0; break; - case 0x0000000d: /* XSAVE */ + case XSTATE_CPUID: + xstate: if ( !cpu_has_xsave ) goto unsupported; + if ( regs->_ecx == 1 ) + { + a &= XSTATE_FEATURE_XSAVEOPT | + XSTATE_FEATURE_XSAVEC | + (cpu_has_xgetbv1 ? XSTATE_FEATURE_XGETBV1 : 0) | + (cpu_has_xsaves ? XSTATE_FEATURE_XSAVES : 0); + if ( !cpu_has_xsaves ) + b = c = d = 0; + } break; case 0x80000001: @@ -3226,14 +3236,15 @@ void do_nmi(struct cpu_user_regs *regs) { unsigned int cpu = smp_processor_id(); unsigned char reason; + bool_t handle_unknown = 0; ++nmi_count(cpu); if ( nmi_callback(regs, cpu) ) return; - if ( nmi_watchdog ) - nmi_watchdog_tick(regs); + if ( !nmi_watchdog || (!nmi_watchdog_tick(regs) && watchdog_force) ) + handle_unknown = 1; /* Only the BSP gets external NMIs from the system. */ if ( cpu == 0 ) @@ -3243,7 +3254,7 @@ void do_nmi(struct cpu_user_regs *regs) pci_serr_error(regs); if ( reason & 0x40 ) io_check_error(regs); - if ( !(reason & 0xc0) && !nmi_watchdog ) + if ( !(reason & 0xc0) && handle_unknown ) unknown_nmi_error(regs, reason); } } diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/x86_64/entry.S xen-4.4.2/./xen/arch/x86/x86_64/entry.S --- xen-4.4.1/./xen/arch/x86/x86_64/entry.S 2015-07-20 11:32:57.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/x86_64/entry.S 2015-03-19 16:06:48.000000000 +0100 @@ -658,14 +658,6 @@ handle_ist_exception: je restore_all_guest jmp compat_restore_all_guest -ENTRY(nmi_crash) - pushq $0 - movl $TRAP_nmi,4(%rsp) - SAVE_ALL - movq %rsp,%rdi - callq do_nmi_crash /* Does not return */ - ud2 - ENTRY(machine_check) pushq $0 movl $TRAP_machine_check,4(%rsp) @@ -716,6 +708,7 @@ ENTRY(exception_table) .quad do_alignment_check .quad do_machine_check .quad do_simd_coprocessor_error + .size exception_table, . - exception_table ENTRY(hypercall_table) .quad do_set_trap_table /* 0 */ diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/x86_emulate/x86_emulate.c xen-4.4.2/./xen/arch/x86/x86_emulate/x86_emulate.c --- xen-4.4.1/./xen/arch/x86/x86_emulate/x86_emulate.c 2015-07-20 11:32:57.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/x86_emulate/x86_emulate.c 2015-03-19 16:06:48.000000000 +0100 @@ -4311,7 +4311,9 @@ x86_emulate( case 0xae: /* Grp15 */ switch ( modrm_reg & 7 ) { - case 7: /* clflush */ + case 7: /* clflush{,opt} */ + fail_if(modrm_mod == 3); + fail_if(rep_prefix()); fail_if(ops->wbinvd == NULL); if ( (rc = ops->wbinvd(ctxt)) != 0 ) goto done; diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/arch/x86/xstate.c xen-4.4.2/./xen/arch/x86/xstate.c --- xen-4.4.1/./xen/arch/x86/xstate.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/arch/x86/xstate.c 2015-03-19 16:06:48.000000000 +0100 @@ -14,7 +14,10 @@ #include #include -bool_t __read_mostly cpu_has_xsaveopt; +static bool_t __read_mostly cpu_has_xsaveopt; +static bool_t __read_mostly cpu_has_xsavec; +bool_t __read_mostly cpu_has_xgetbv1; +bool_t __read_mostly cpu_has_xsaves; /* * Maximum size (in byte) of the XSAVE/XRSTOR save area required by all @@ -299,12 +302,22 @@ void xstate_init(bool_t bsp) BUG_ON(xsave_cntxt_size != xstate_ctxt_size(feature_mask)); } - /* Check XSAVEOPT feature. */ + /* Check extended XSAVE features. */ cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); if ( bsp ) + { cpu_has_xsaveopt = !!(eax & XSTATE_FEATURE_XSAVEOPT); + cpu_has_xsavec = !!(eax & XSTATE_FEATURE_XSAVEC); + /* XXX cpu_has_xgetbv1 = !!(eax & XSTATE_FEATURE_XGETBV1); */ + /* XXX cpu_has_xsaves = !!(eax & XSTATE_FEATURE_XSAVES); */ + } else + { BUG_ON(!cpu_has_xsaveopt != !(eax & XSTATE_FEATURE_XSAVEOPT)); + BUG_ON(!cpu_has_xsavec != !(eax & XSTATE_FEATURE_XSAVEC)); + /* XXX BUG_ON(!cpu_has_xgetbv1 != !(eax & XSTATE_FEATURE_XGETBV1)); */ + /* XXX BUG_ON(!cpu_has_xsaves != !(eax & XSTATE_FEATURE_XSAVES)); */ + } } unsigned int xstate_ctxt_size(u64 xcr0) diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/common/bunzip2.c xen-4.4.2/./xen/common/bunzip2.c --- xen-4.4.1/./xen/common/bunzip2.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/common/bunzip2.c 2015-03-19 16:06:48.000000000 +0100 @@ -174,7 +174,7 @@ static int INIT get_next_block(struct bu if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT; origPtr = get_bits(bd, 24); - if (origPtr > dbufSize) + if (origPtr >= dbufSize) return RETVAL_DATA_ERROR; /* mapping table: if some byte values are never used (encoding things like ascii text), the compression code removes the gaps to have fewer diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/common/cpupool.c xen-4.4.2/./xen/common/cpupool.c --- xen-4.4.1/./xen/common/cpupool.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/common/cpupool.c 2015-03-19 16:06:48.000000000 +0100 @@ -225,6 +225,35 @@ static int cpupool_destroy(struct cpupoo } /* + * Move domain to another cpupool + */ +static int cpupool_move_domain_locked(struct domain *d, struct cpupool *c) +{ + int ret; + + d->cpupool->n_dom--; + ret = sched_move_domain(d, c); + if ( ret ) + d->cpupool->n_dom++; + else + c->n_dom++; + + return ret; +} +int cpupool_move_domain(struct domain *d, struct cpupool *c) +{ + int ret; + + spin_lock(&cpupool_lock); + + ret = cpupool_move_domain_locked(d, c); + + spin_unlock(&cpupool_lock); + + return ret; +} + +/* * assign a specific cpu to a cpupool * cpupool_lock must be held */ @@ -338,14 +367,9 @@ int cpupool_unassign_cpu(struct cpupool ret = -EBUSY; break; } - c->n_dom--; - ret = sched_move_domain(d, cpupool0); + ret = cpupool_move_domain_locked(d, cpupool0); if ( ret ) - { - c->n_dom++; break; - } - cpupool0->n_dom++; } rcu_read_unlock(&domlist_read_lock); if ( ret ) @@ -613,16 +637,11 @@ int cpupool_do_sysctl(struct xen_sysctl_ d->domain_id, op->cpupool_id); ret = -ENOENT; spin_lock(&cpupool_lock); + c = cpupool_find_by_id(op->cpupool_id); if ( (c != NULL) && cpumask_weight(c->cpu_valid) ) - { - d->cpupool->n_dom--; - ret = sched_move_domain(d, c); - if ( ret ) - d->cpupool->n_dom++; - else - c->n_dom++; - } + ret = cpupool_move_domain_locked(d, c); + spin_unlock(&cpupool_lock); cpupool_dprintk("cpupool move_domain(dom=%d)->pool=%d ret %d\n", d->domain_id, op->cpupool_id, ret); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/common/domain.c xen-4.4.2/./xen/common/domain.c --- xen-4.4.1/./xen/common/domain.c 2015-07-20 11:32:57.000000000 +0200 +++ xen-4.4.2/./xen/common/domain.c 2015-03-19 16:06:48.000000000 +0100 @@ -538,7 +538,7 @@ int domain_kill(struct domain *d) { break; } - if ( sched_move_domain(d, cpupool0) ) + if ( cpupool_move_domain(d, cpupool0) ) return -EAGAIN; for_each_vcpu ( d, v ) unmap_vcpu_info(v); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/common/domctl.c xen-4.4.2/./xen/common/domctl.c --- xen-4.4.1/./xen/common/domctl.c 2015-07-20 11:32:57.000000000 +0200 +++ xen-4.4.2/./xen/common/domctl.c 2015-07-20 11:39:03.643092156 +0200 @@ -154,6 +154,7 @@ void getdomaininfo(struct domain *d, str struct vcpu_runstate_info runstate; info->domain = d->domain_id; + info->max_vcpu_id = XEN_INVALID_MAX_VCPU_ID; info->nr_online_vcpus = 0; info->ssidref = 0; @@ -779,17 +780,21 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe case XEN_DOMCTL_irq_permission: { - unsigned int pirq = op->u.irq_permission.pirq; + unsigned int pirq = op->u.irq_permission.pirq, irq; int allow = op->u.irq_permission.allow_access; - if ( pirq >= d->nr_pirqs ) + if ( pirq >= current->domain->nr_pirqs ) + { ret = -EINVAL; - else if ( xsm_irq_permission(XSM_HOOK, d, pirq, allow) ) + break; + } + irq = pirq_access_permitted(current->domain, pirq); + if ( !irq || xsm_irq_permission(XSM_HOOK, d, irq, allow) ) ret = -EPERM; else if ( allow ) - ret = pirq_permit_access(d, pirq); + ret = irq_permit_access(d, irq); else - ret = pirq_deny_access(d, pirq); + ret = irq_deny_access(d, irq); } break; diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/common/event_channel.c xen-4.4.2/./xen/common/event_channel.c --- xen-4.4.1/./xen/common/event_channel.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/common/event_channel.c 2015-03-19 16:06:48.000000000 +0100 @@ -1140,21 +1140,25 @@ int alloc_unbound_xen_event_channel( spin_lock(&d->event_lock); - if ( (port = get_free_port(d)) < 0 ) + rc = get_free_port(d); + if ( rc < 0 ) goto out; + port = rc; chn = evtchn_from_port(d, port); rc = xsm_evtchn_unbound(XSM_TARGET, d, chn, remote_domid); + if ( rc ) + goto out; chn->state = ECS_UNBOUND; chn->xen_consumer = get_xen_consumer(notification_fn); chn->notify_vcpu_id = local_vcpu->vcpu_id; - chn->u.unbound.remote_domid = !rc ? remote_domid : DOMID_INVALID; + chn->u.unbound.remote_domid = remote_domid; out: spin_unlock(&d->event_lock); - return port; + return rc < 0 ? rc : port; } diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/common/hvm/save.c xen-4.4.2/./xen/common/hvm/save.c --- xen-4.4.1/./xen/common/hvm/save.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/common/hvm/save.c 2015-03-19 16:06:48.000000000 +0100 @@ -292,19 +292,22 @@ int _hvm_check_entry(struct hvm_domain_c { struct hvm_save_descriptor *d = (struct hvm_save_descriptor *)&h->data[h->cur]; - if ( len + sizeof (*d) > h->size - h->cur) + if ( sizeof(*d) > h->size - h->cur) { printk(XENLOG_G_WARNING - "HVM restore: not enough data left to read %u bytes " - "for type %u\n", len, type); + "HVM restore: not enough data left to read %zu bytes " + "for type %u header\n", sizeof(*d), type); return -1; - } - if ( (type != d->typecode) || (len < d->length) || - (strict_length && (len != d->length)) ) + } + if ( (type != d->typecode) || + (strict_length ? (len != d->length) : (len < d->length)) || + (d->length > (h->size - h->cur - sizeof(*d))) ) { printk(XENLOG_G_WARNING - "HVM restore mismatch: expected type %u length %u, " - "saw type %u length %u\n", type, len, d->typecode, d->length); + "HVM restore mismatch: expected %s type %u length %u, " + "saw type %u length %u. %zu bytes remaining\n", + strict_length ? "strict" : "zeroextended", type, len, + d->typecode, d->length, h->size - h->cur - sizeof(*d)); return -1; } h->cur += sizeof(*d); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/common/keyhandler.c xen-4.4.2/./xen/common/keyhandler.c --- xen-4.4.1/./xen/common/keyhandler.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/common/keyhandler.c 2015-03-19 16:06:48.000000000 +0100 @@ -254,6 +254,9 @@ static void dump_domains(unsigned char k for_each_domain ( d ) { unsigned int i; + + process_pending_softirqs(); + printk("General information for domain %u:\n", d->domain_id); cpuset_print(tmpstr, sizeof(tmpstr), d->domain_dirty_cpumask); printk(" refcnt=%d dying=%d pause_count=%d\n", @@ -288,6 +291,9 @@ static void dump_domains(unsigned char k d->domain_id); for_each_vcpu ( d, v ) { + if ( !(v->vcpu_id & 0x3f) ) + process_pending_softirqs(); + printk(" VCPU%d: CPU%d [has=%c] poll=%d " "upcall_pend = %02x, upcall_mask = %02x ", v->vcpu_id, v->processor, @@ -310,6 +316,9 @@ static void dump_domains(unsigned char k { for_each_vcpu ( d, v ) { + if ( !(v->vcpu_id & 0x3f) ) + process_pending_softirqs(); + printk("Notifying guest %d:%d (virq %d, port %d)\n", d->domain_id, v->vcpu_id, VIRQ_DEBUG, v->virq_to_evtchn[VIRQ_DEBUG]); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/common/memory.c xen-4.4.2/./xen/common/memory.c --- xen-4.4.1/./xen/common/memory.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/common/memory.c 2015-03-19 16:06:48.000000000 +0100 @@ -744,11 +744,10 @@ long do_memory_op(unsigned long cmd, XEN return start_extent; args.domain = d; - rc = xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d); - if ( rc ) + if ( xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d) ) { rcu_unlock_domain(d); - return rc; + return start_extent; } switch ( op ) diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/drivers/char/dt-uart.c xen-4.4.2/./xen/drivers/char/dt-uart.c --- xen-4.4.1/./xen/drivers/char/dt-uart.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/drivers/char/dt-uart.c 2015-03-19 16:06:48.000000000 +0100 @@ -26,13 +26,13 @@ /* * Configure UART port with a string: - * path,options + * path:options * * @path: full path used in the device tree for the UART. If the path * doesn't start with '/', we assuming that it's an alias. * @options: UART speficic options (see in each UART driver) */ -static char __initdata opt_dtuart[30] = ""; +static char __initdata opt_dtuart[256] = ""; string_param("dtuart", opt_dtuart); void __init dt_uart_init(void) @@ -48,7 +48,7 @@ void __init dt_uart_init(void) return; } - options = strchr(opt_dtuart, ','); + options = strchr(opt_dtuart, ':'); if ( options != NULL ) *(options++) = '\0'; else diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/drivers/passthrough/amd/iommu_guest.c xen-4.4.2/./xen/drivers/passthrough/amd/iommu_guest.c --- xen-4.4.1/./xen/drivers/passthrough/amd/iommu_guest.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/drivers/passthrough/amd/iommu_guest.c 2015-03-19 16:06:48.000000000 +0100 @@ -885,6 +885,7 @@ int guest_iommu_init(struct domain* d) } guest_iommu_reg_init(iommu); + iommu->mmio_base = ~0ULL; iommu->domain = d; hd->g_iommu = iommu; diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/drivers/passthrough/amd/pci_amd_iommu.c xen-4.4.2/./xen/drivers/passthrough/amd/pci_amd_iommu.c --- xen-4.4.1/./xen/drivers/passthrough/amd/pci_amd_iommu.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/drivers/passthrough/amd/pci_amd_iommu.c 2015-03-19 16:06:48.000000000 +0100 @@ -19,6 +19,7 @@ */ #include +#include #include #include #include @@ -283,6 +284,7 @@ static int amd_iommu_domain_init(struct static void __init amd_iommu_dom0_init(struct domain *d) { unsigned long i; + const struct amd_iommu *iommu; if ( !iommu_passthrough && !need_iommu(d) ) { @@ -304,6 +306,12 @@ static void __init amd_iommu_dom0_init(s } } + for_each_amd_iommu ( iommu ) + if ( iomem_deny_access(d, PFN_DOWN(iommu->mmio_base_phys), + PFN_DOWN(iommu->mmio_base_phys + + IOMMU_MMIO_REGION_LENGTH - 1)) ) + BUG(); + setup_dom0_pci_devices(d, amd_iommu_setup_dom0_device); } diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/drivers/passthrough/vtd/iommu.c xen-4.4.2/./xen/drivers/passthrough/vtd/iommu.c --- xen-4.4.1/./xen/drivers/passthrough/vtd/iommu.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/drivers/passthrough/vtd/iommu.c 2015-03-19 16:06:48.000000000 +0100 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -258,8 +259,7 @@ static u64 addr_to_dma_page_maddr(struct struct dma_pte *parent, *pte = NULL; int level = agaw_to_level(hd->agaw); int offset; - u64 pte_maddr = 0, maddr; - u64 *vaddr = NULL; + u64 pte_maddr = 0; addr &= (((u64)1) << addr_width) - 1; ASSERT(spin_is_locked(&hd->mapping_lock)); @@ -281,19 +281,19 @@ static u64 addr_to_dma_page_maddr(struct offset = address_level_offset(addr, level); pte = &parent[offset]; - if ( dma_pte_addr(*pte) == 0 ) + pte_maddr = dma_pte_addr(*pte); + if ( !pte_maddr ) { if ( !alloc ) break; pdev = pci_get_pdev_by_domain(domain, -1, -1, -1); drhd = acpi_find_matched_drhd_unit(pdev); - maddr = alloc_pgtable_maddr(drhd, 1); - if ( !maddr ) + pte_maddr = alloc_pgtable_maddr(drhd, 1); + if ( !pte_maddr ) break; - dma_set_pte_addr(*pte, maddr); - vaddr = map_vtd_domain_page(maddr); + dma_set_pte_addr(*pte, pte_maddr); /* * high level table always sets r/w, last level @@ -303,21 +303,12 @@ static u64 addr_to_dma_page_maddr(struct dma_set_pte_writable(*pte); iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); } - else - { - vaddr = map_vtd_domain_page(pte->val); - } if ( level == 2 ) - { - pte_maddr = pte->val & PAGE_MASK_4K; - unmap_vtd_domain_page(vaddr); break; - } unmap_vtd_domain_page(parent); - parent = (struct dma_pte *)vaddr; - vaddr = NULL; + parent = map_vtd_domain_page(pte_maddr); level--; } @@ -1259,6 +1250,9 @@ static void __init intel_iommu_dom0_init for_each_drhd_unit ( drhd ) { + if ( iomem_deny_access(d, PFN_DOWN(drhd->address), + PFN_DOWN(drhd->address)) ) + BUG(); iommu_enable_translation(drhd); } } @@ -2443,7 +2437,7 @@ static void vtd_dump_p2m_table_level(pad printk("%*sgfn: %08lx mfn: %08lx\n", indent, "", (unsigned long)(address >> PAGE_SHIFT_4K), - (unsigned long)(pte->val >> PAGE_SHIFT_4K)); + (unsigned long)(dma_pte_addr(*pte) >> PAGE_SHIFT_4K)); } unmap_vtd_domain_page(pt_vaddr); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/drivers/passthrough/vtd/iommu.h xen-4.4.2/./xen/drivers/passthrough/vtd/iommu.h --- xen-4.4.1/./xen/drivers/passthrough/vtd/iommu.h 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/drivers/passthrough/vtd/iommu.h 2015-03-19 16:06:48.000000000 +0100 @@ -276,7 +276,7 @@ struct dma_pte { #define dma_set_pte_snp(p) do {(p).val |= DMA_PTE_SNP;} while(0) #define dma_set_pte_prot(p, prot) \ do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) -#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) +#define dma_pte_addr(p) ((p).val & PADDR_MASK & PAGE_MASK_4K) #define dma_set_pte_addr(p, addr) do {\ (p).val |= ((addr) & PAGE_MASK_4K); } while (0) #define dma_pte_present(p) (((p).val & 3) != 0) diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/drivers/passthrough/vtd/utils.c xen-4.4.2/./xen/drivers/passthrough/vtd/utils.c --- xen-4.4.1/./xen/drivers/passthrough/vtd/utils.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/drivers/passthrough/vtd/utils.c 2015-03-19 16:06:48.000000000 +0100 @@ -170,16 +170,16 @@ void print_vtd_entries(struct iommu *iom l_index = get_level_index(gmfn, level); printk(" l%d_index = %x\n", level, l_index); - pte.val = val = l[l_index]; + pte.val = l[l_index]; unmap_vtd_domain_page(l); - printk(" l%d[%x] = %"PRIx64"\n", level, l_index, val); + printk(" l%d[%x] = %"PRIx64"\n", level, l_index, pte.val); - pte.val = val; if ( !dma_pte_present(pte) ) { printk(" l%d[%x] not present\n", level, l_index); break; } + val = dma_pte_addr(pte); } while ( --level ); } diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/drivers/passthrough/x86/ats.c xen-4.4.2/./xen/drivers/passthrough/x86/ats.c --- xen-4.4.1/./xen/drivers/passthrough/x86/ats.c 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/drivers/passthrough/x86/ats.c 2015-03-19 16:06:48.000000000 +0100 @@ -20,7 +20,7 @@ LIST_HEAD(ats_devices); -bool_t __read_mostly ats_enabled = 1; +bool_t __read_mostly ats_enabled = 0; boolean_param("ats", ats_enabled); int enable_ats_device(int seg, int bus, int devfn, const void *iommu) diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/include/asm-arm/arm64/page.h xen-4.4.2/./xen/include/asm-arm/arm64/page.h --- xen-4.4.1/./xen/include/asm-arm/arm64/page.h 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/include/asm-arm/arm64/page.h 2015-03-19 16:06:48.000000000 +0100 @@ -88,9 +88,9 @@ static inline uint64_t gva_to_ma_par(vad uint64_t par, tmp = READ_SYSREG64(PAR_EL1); if ( (flags & GV2M_WRITE) == GV2M_WRITE ) - asm volatile ("at s12e1r, %0;" : : "r" (va)); - else asm volatile ("at s12e1w, %0;" : : "r" (va)); + else + asm volatile ("at s12e1r, %0;" : : "r" (va)); isb(); par = READ_SYSREG64(PAR_EL1); WRITE_SYSREG64(tmp, PAR_EL1); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/include/asm-x86/apic.h xen-4.4.2/./xen/include/asm-x86/apic.h --- xen-4.4.1/./xen/include/asm-x86/apic.h 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/include/asm-x86/apic.h 2015-03-19 16:06:48.000000000 +0100 @@ -206,7 +206,7 @@ extern void release_lapic_nmi(void); extern void self_nmi(void); extern void disable_timer_nmi_watchdog(void); extern void enable_timer_nmi_watchdog(void); -extern void nmi_watchdog_tick (struct cpu_user_regs *regs); +extern bool_t nmi_watchdog_tick (struct cpu_user_regs *regs); extern int APIC_init_uniprocessor (void); extern void disable_APIC_timer(void); extern void enable_APIC_timer(void); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/include/asm-x86/config.h xen-4.4.2/./xen/include/asm-x86/config.h --- xen-4.4.1/./xen/include/asm-x86/config.h 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/include/asm-x86/config.h 2015-03-19 16:06:48.000000000 +0100 @@ -101,6 +101,10 @@ /* Return value for zero-size _xmalloc(), distinguished from NULL. */ #define ZERO_BLOCK_PTR ((void *)0xBAD0BAD0BAD0BAD0UL) +/* Override include/xen/list.h to make these non-canonical addresses. */ +#define LIST_POISON1 ((void *)0x0100100100100100UL) +#define LIST_POISON2 ((void *)0x0200200200200200UL) + #ifndef __ASSEMBLY__ extern unsigned long trampoline_phys; #define bootsym_phys(sym) \ diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/include/asm-x86/hvm/vlapic.h xen-4.4.2/./xen/include/asm-x86/hvm/vlapic.h --- xen-4.4.1/./xen/include/asm-x86/hvm/vlapic.h 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/include/asm-x86/hvm/vlapic.h 2015-03-19 16:06:48.000000000 +0100 @@ -58,6 +58,8 @@ #define VEC_POS(v) ((v) % 32) #define REG_POS(v) (((v) / 32) * 0x10) +#define vlapic_test_vector(vec, bitmap) \ + test_bit(VEC_POS(vec), (const uint32_t *)((bitmap) + REG_POS(vec))) #define vlapic_test_and_set_vector(vec, bitmap) \ test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec))) #define vlapic_test_and_clear_vector(vec, bitmap) \ diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/include/asm-x86/nmi.h xen-4.4.2/./xen/include/asm-x86/nmi.h --- xen-4.4.1/./xen/include/asm-x86/nmi.h 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/include/asm-x86/nmi.h 2015-03-19 16:06:48.000000000 +0100 @@ -8,6 +8,9 @@ struct cpu_user_regs; /* Watchdog boolean from the command line */ extern bool_t opt_watchdog; + +/* Watchdog force parameter from the command line */ +extern bool_t watchdog_force; typedef int (*nmi_callback_t)(struct cpu_user_regs *regs, int cpu); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/include/asm-x86/processor.h xen-4.4.2/./xen/include/asm-x86/processor.h --- xen-4.4.1/./xen/include/asm-x86/processor.h 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/include/asm-x86/processor.h 2015-03-19 16:06:48.000000000 +0100 @@ -113,7 +113,7 @@ #define TRAP_alignment_check 17 #define TRAP_machine_check 18 #define TRAP_simd_error 19 -#define TRAP_last_reserved 31 +#define TRAP_nr 32 /* Set for entry via SYSCALL. Informs return code to use SYSRETQ not IRETQ. */ /* NB. Same as VGCF_in_syscall. No bits in common with any other TRAP_ defn. */ @@ -523,6 +523,9 @@ extern void mtrr_bp_init(void); void mcheck_init(struct cpuinfo_x86 *c, bool_t bsp); +/* Dispatch table for exceptions */ +extern void (* const exception_table[TRAP_nr])(struct cpu_user_regs *regs); + #define DECLARE_TRAP_HANDLER(_name) \ void _name(void); \ void do_ ## _name(struct cpu_user_regs *regs) diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/include/asm-x86/xstate.h xen-4.4.2/./xen/include/asm-x86/xstate.h --- xen-4.4.1/./xen/include/asm-x86/xstate.h 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/include/asm-x86/xstate.h 2015-03-19 16:06:48.000000000 +0100 @@ -16,6 +16,9 @@ #define XSTATE_CPUID 0x0000000d #define XSTATE_FEATURE_XSAVEOPT (1 << 0) /* sub-leaf 1, eax[bit 0] */ +#define XSTATE_FEATURE_XSAVEC (1 << 1) /* sub-leaf 1, eax[bit 1] */ +#define XSTATE_FEATURE_XGETBV1 (1 << 2) /* sub-leaf 1, eax[bit 2] */ +#define XSTATE_FEATURE_XSAVES (1 << 3) /* sub-leaf 1, eax[bit 3] */ #define XCR_XFEATURE_ENABLED_MASK 0x00000000 /* index of XCR0 */ @@ -35,6 +38,7 @@ #define XSTATE_LAZY (XSTATE_ALL & ~XSTATE_NONLAZY) extern u64 xfeature_mask; +extern bool_t cpu_has_xsaves, cpu_has_xgetbv1; /* extended state save area */ struct xsave_struct diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/include/public/domctl.h xen-4.4.2/./xen/include/public/domctl.h --- xen-4.4.1/./xen/include/public/domctl.h 2015-07-20 11:32:57.000000000 +0200 +++ xen-4.4.2/./xen/include/public/domctl.h 2015-07-20 11:39:03.687092156 +0200 @@ -107,6 +107,7 @@ struct xen_domctl_getdomaininfo { uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */ uint64_aligned_t cpu_time; uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */ +#define XEN_INVALID_MAX_VCPU_ID (~0U) /* Domain has no vcpus? */ uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */ uint32_t ssidref; xen_domain_handle_t handle; diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/include/xen/efi.h xen-4.4.2/./xen/include/xen/efi.h --- xen-4.4.1/./xen/include/xen/efi.h 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/include/xen/efi.h 2015-03-19 16:06:48.000000000 +0100 @@ -28,6 +28,7 @@ struct xenpf_efi_runtime_call; struct compat_pf_efi_runtime_call; void efi_init_memory(void); +paddr_t efi_rs_page_table(void); unsigned long efi_get_time(void); void efi_halt_system(void); void efi_reset_system(bool_t warm); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/include/xen/iocap.h xen-4.4.2/./xen/include/xen/iocap.h --- xen-4.4.1/./xen/include/xen/iocap.h 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/include/xen/iocap.h 2015-03-19 16:06:48.000000000 +0100 @@ -28,22 +28,11 @@ #define irq_access_permitted(d, i) \ rangeset_contains_singleton((d)->irq_caps, i) -#define pirq_permit_access(d, i) ({ \ - struct domain *d__ = (d); \ - int i__ = domain_pirq_to_irq(d__, i); \ - i__ > 0 ? rangeset_add_singleton(d__->irq_caps, i__)\ - : -EINVAL; \ -}) -#define pirq_deny_access(d, i) ({ \ - struct domain *d__ = (d); \ - int i__ = domain_pirq_to_irq(d__, i); \ - i__ > 0 ? rangeset_remove_singleton(d__->irq_caps, i__)\ - : -EINVAL; \ -}) #define pirq_access_permitted(d, i) ({ \ struct domain *d__ = (d); \ - rangeset_contains_singleton(d__->irq_caps, \ - domain_pirq_to_irq(d__, i));\ + int irq__ = domain_pirq_to_irq(d__, i); \ + irq__ > 0 && irq_access_permitted(d__, irq__) \ + ? irq__ : 0; \ }) #endif /* __XEN_IOCAP_H__ */ diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/include/xen/list.h xen-4.4.2/./xen/include/xen/list.h --- xen-4.4.1/./xen/include/xen/list.h 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/include/xen/list.h 2015-03-19 16:06:48.000000000 +0100 @@ -10,12 +10,15 @@ #include #include -/* These are non-NULL pointers that will result in page faults - * under normal circumstances, used to verify that nobody uses - * non-initialized list entries. +/* + * These are non-NULL pointers that will result in faults under normal + * circumstances, used to verify that nobody uses non-initialized list + * entries. Architectures can override these. */ +#ifndef LIST_POISON1 #define LIST_POISON1 ((void *) 0x00100100) #define LIST_POISON2 ((void *) 0x00200200) +#endif /* * Simple doubly linked list implementation. diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/include/xen/sched.h xen-4.4.2/./xen/include/xen/sched.h --- xen-4.4.1/./xen/include/xen/sched.h 2014-09-02 08:20:19.000000000 +0200 +++ xen-4.4.2/./xen/include/xen/sched.h 2015-03-19 16:06:48.000000000 +0100 @@ -828,6 +828,7 @@ struct cpupool *cpupool_get_by_id(int po void cpupool_put(struct cpupool *pool); int cpupool_add_domain(struct domain *d, int poolid); void cpupool_rm_domain(struct domain *d); +int cpupool_move_domain(struct domain *d, struct cpupool *c); int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op); void schedule_dump(struct cpupool *c); extern void dump_runq(unsigned char key); diff -Nurp '--exclude=debian' '--exclude=.pc' xen-4.4.1/./xen/Makefile xen-4.4.2/./xen/Makefile --- xen-4.4.1/./xen/Makefile 2015-07-20 11:32:57.000000000 +0200 +++ xen-4.4.2/./xen/Makefile 2015-07-20 11:39:02.455092175 +0200 @@ -2,7 +2,7 @@ # All other places this is stored (eg. compile.h) should be autogenerated. export XEN_VERSION = 4 export XEN_SUBVERSION = 4 -export XEN_EXTRAVERSION ?= .1$(XEN_VENDORVERSION) +export XEN_EXTRAVERSION ?= .2$(XEN_VENDORVERSION) export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION) -include xen-version