Comment 1 for bug 1807052

Revision history for this message
Matthew Schumacher (schu-schu) wrote :

If I remote iothreads and writeback caching, it seems more reliable, but I can still get it to hang.

This time the source server shows the VM as running, backtrace looks like:

(gdb) bt full
#0 0x00007f27eab0028c in __lll_lock_wait () at /lib64/libpthread.so.0
#1 0x00007f27eaaf9d35 in pthread_mutex_lock () at /lib64/libpthread.so.0
#2 0x0000000000865419 in qemu_mutex_lock_impl (mutex=mutex@entry=0x115b8e0 <qemu_global_mutex>, file=file@entry=0x8fdf14 "/tmp/qemu-3.0.0/cpus.c", line=line@entry=1768)
    at util/qemu-thread-posix.c:66
        err = <optimized out>
        __PRETTY_FUNCTION__ = "qemu_mutex_lock_impl"
        __func__ = "qemu_mutex_lock_impl"
#3 0x0000000000477578 in qemu_mutex_lock_iothread () at /tmp/qemu-3.0.0/cpus.c:1768
#4 0x00000000008622b0 in main_loop_wait (timeout=<optimized out>) at util/main-loop.c:236
        context = 0x1e72810
        ret = 1
        ret = 1
        timeout = 4294967295
        timeout_ns = <optimized out>
#5 0x00000000008622b0 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:497
        ret = 1
        timeout = 4294967295
        timeout_ns = <optimized out>
#6 0x0000000000595dee in main_loop () at vl.c:1866
#7 0x000000000041f35d in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4644
        i = <optimized out>
        snapshot = 0
        linux_boot = <optimized out>
        initrd_filename = 0x0
        kernel_filename = <optimized out>
        kernel_cmdline = <optimized out>
        boot_order = 0x918f44 "cad"
        boot_once = 0x0
        ds = <optimized out>
        opts = <optimized out>
        machine_opts = <optimized out>
        icount_opts = <optimized out>
        accel_opts = 0x0
        olist = <optimized out>
        optind = 71
        optarg = 0x7fff5edcff69 "timestamp=on"
        loadvm = 0x0
        machine_class = 0x0
        cpu_model = 0x7fff5edcf88a "Skylake-Server-IBRS,ss=on,hypervisor=on,tsc_adjust=on,clflushopt=on,umip=on,pku=on,ssbd=on,xsaves=on,topoext=on,hv_time,hv_relaxed,hv_vapic,hv_spinlocks=0x1fff,hv_vpindex,hv_runtime,hv_synic,hv_stimer"...
        vga_model = 0x0
        qtest_chrdev = 0x0
        qtest_log = 0x0
        pid_file = <optimized out>
        incoming = 0x7fff5edcff0a "defer"
        userconfig = <optimized out>
        nographic = false
        display_remote = <optimized out>
        log_mask = <optimized out>
        log_file = <optimized out>
        trace_file = <optimized out>
        maxram_size = 4294967296
        ram_slots = 0
        vmstate_dump_file = 0x0
        main_loop_err = 0x0
---Type <return> to continue, or q <return> to quit---
        err = 0x0
        list_data_dirs = false
        dir = <optimized out>
        dirs = <optimized out>
        bdo_queue = {sqh_first = 0x0, sqh_last = 0x7fff5edcd670}
        __func__ = "main"

Dest server is paused, and looks like this:

#0 0x00007f11c48bc3c1 in ppoll () at /lib64/libc.so.6
#1 0x0000000000861659 in qemu_poll_ns (fds=<optimized out>, nfds=<optimized out>, timeout=timeout@entry=2999892383) at util/qemu-timer.c:334
        ts = {tv_sec = 2, tv_nsec = 999892383}
Python Exception <class 'gdb.error'> That operation is not available on integers of more than 8 bytes.:
#2 0x00000000008622a4 in main_loop_wait (timeout=<optimized out>) at util/main-loop.c:233
        context = 0x2342810
        ret = <optimized out>
        ret = -1295074913
        timeout = 4294967295
        timeout_ns = <optimized out>
#3 0x00000000008622a4 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:497
        ret = -1295074913
        timeout = 4294967295
        timeout_ns = <optimized out>
#4 0x0000000000595dee in main_loop () at vl.c:1866
#5 0x000000000041f35d in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4644
        i = <optimized out>
        snapshot = 0
        linux_boot = <optimized out>
        initrd_filename = 0x0
        kernel_filename = <optimized out>
        kernel_cmdline = <optimized out>
        boot_order = 0x918f44 "cad"
        boot_once = 0x0
        ds = <optimized out>
        opts = <optimized out>
        machine_opts = <optimized out>
        icount_opts = <optimized out>
        accel_opts = 0x0
        olist = <optimized out>
        optind = 71
        optarg = 0x7ffe6b899f69 "timestamp=on"
        loadvm = 0x0
        machine_class = 0x0
        cpu_model = 0x7ffe6b89988a "Skylake-Server-IBRS,ss=on,hypervisor=on,tsc_adjust=on,clflushopt=on,umip=on,pku=on,ssbd=on,xsaves=on,topoext=on,hv_time,hv_relaxed,hv_vapic,hv_spinlocks=0x1fff,hv_vpindex,hv_runtime,hv_synic,hv_stimer"...
        vga_model = 0x0
        qtest_chrdev = 0x0
        qtest_log = 0x0
        pid_file = <optimized out>
        incoming = 0x7ffe6b899f0a "defer"
        userconfig = <optimized out>
        nographic = false
        display_remote = <optimized out>
        log_mask = <optimized out>
        log_file = <optimized out>
        trace_file = <optimized out>
        maxram_size = 4294967296
        ram_slots = 0
        vmstate_dump_file = 0x0
        main_loop_err = 0x0
        err = 0x0
        list_data_dirs = false
        dir = <optimized out>
        dirs = <optimized out>
        bdo_queue = {sqh_first = 0x0, sqh_last = 0x7ffe6b8988e0}
---Type <return> to continue, or q <return> to quit---
        __func__ = "main"

Honestly looks pretty much like the same bug....