Comment 1 for bug 1743637

Revision history for this message
Rafael David Tinoco (rafaeldtinoco) wrote :

All the other threads were shutting down after VM_EXIT

#0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
#1 0x00007fe48e71b3f8 in _L_cond_lock_886 () from /lib/x86_64-linux-gnu/libpthread.so.0
#2 0x00007fe48e71b164 in __pthread_mutex_cond_lock (mutex=0x556a92b60800 <qemu_global_mutex>)
    at ../nptl/pthread_mutex_lock.c:79
#3 0x00007fe48e715494 in pthread_cond_wait@@GLIBC_2.3.2 ()
    at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:259
#4 0x0000556a925bd549 in qemu_cond_wait (cond=<optimized out>,
    mutex=mutex@entry=0x556a92b60800 <qemu_global_mutex>)
    at /build/qemu-KH8VkI/qemu-2.5+dfsg/util/qemu-thread-posix.c:132
#5 0x0000556a922898db in qemu_kvm_wait_io_event (cpu=<optimized out>)
    at /build/qemu-KH8VkI/qemu-2.5+dfsg/cpus.c:1016

The failing thread has a buggy assert:

(gdb) bt
#0 0x00007fe48e376c37 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
#1 0x00007fe48e37a028 in __GI_abort () at abort.c:89
#2 0x00007fe48e36fbf6 in __assert_fail_base (
    fmt=0x7fe48e4c4018 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n",
    assertion=assertion@entry=0x556a926552a8 "ncs[i]->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER", file=file@entry=0x556a926551c8 "/build/qemu-KH8VkI/qemu-2.5+dfsg/net/vhost-user.c", line=line@entry=50,
    function=function@entry=0x556a92655440 <__PRETTY_FUNCTION__.31032> "vhost_user_stop") at assert.c:92
#3 0x00007fe48e36fca2 in __GI___assert_fail (
    assertion=assertion@entry=0x556a926552a8 "ncs[i]->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER", file=file@entry=0x556a926551c8 "/build/qemu-KH8VkI/qemu-2.5+dfsg/net/vhost-user.c", line=line@entry=50,
    function=function@entry=0x556a92655440 <__PRETTY_FUNCTION__.31032> "vhost_user_stop")
    at assert.c:101
#4 0x0000556a924f07e1 in vhost_user_stop (queues=<optimized out>, ncs=<optimized out>)
    at /build/qemu-KH8VkI/qemu-2.5+dfsg/net/vhost-user.c:50
#5 0x0000556a924f089a in net_vhost_user_event (opaque=0x556a934b15c0, event=5)
    at /build/qemu-KH8VkI/qemu-2.5+dfsg/net/vhost-user.c:192
#6 0x0000556a9237fb4f in tcp_chr_disconnect (chr=0x556a934b0900)
    at /build/qemu-KH8VkI/qemu-2.5+dfsg/qemu-char.c:2873
#7 0x0000556a9237fc09 in tcp_chr_sync_read (chr=0x556a934b0900, buf=<optimized out>,
    len=<optimized out>) at /build/qemu-KH8VkI/qemu-2.5+dfsg/qemu-char.c:2920
#8 0x0000556a923814cd in qemu_chr_fe_read_all (s=s@entry=0x556a934b0900,
    buf=buf@entry=0x7ffe4d074bf0 "\v", len=len@entry=12)
    at /build/qemu-KH8VkI/qemu-2.5+dfsg/qemu-char.c:239
#9 0x0000556a922ecbf8 in vhost_user_read (msg=msg@entry=0x7ffe4d074bf0, dev=0x556a934dd6d0)
    at /build/qemu-KH8VkI/qemu-2.5+dfsg/hw/virtio/vhost-user.c:122
#10 0x0000556a922ed00b in vhost_user_get_vring_base (dev=0x556a934dd6d0, ring=0x7ffe4d074d30)
    at /build/qemu-KH8VkI/qemu-2.5+dfsg/hw/virtio/vhost-user.c:366
#11 0x0000556a922e8ed0 in vhost_virtqueue_stop (dev=dev@entry=0x556a934dd6d0,
    vdev=vdev@entry=0x556a93bca458, vq=0x556a934dd808, idx=0)
    at /build/qemu-KH8VkI/qemu-2.5+dfsg/hw/virtio/vhost.c:895
#12 0x0000556a922eb824 in vhost_dev_stop (hdev=hdev@entry=0x556a934dd6d0,
    vdev=vdev@entry=0x556a93bca458) at /build/qemu-KH8VkI/qemu-2.5+dfsg/hw/virtio/vhost.c:1262
#13 0x0000556a922d41d8 in vhost_net_stop_one (net=0x556a934dd6d0, dev=dev@entry=0x556a93bca458)
    at /build/qemu-KH8VkI/qemu-2.5+dfsg/hw/net/vhost_net.c:293
#14 0x0000556a922d4d8b in vhost_net_stop (dev=dev@entry=0x556a93bca458, ncs=0x556a93ef08e0,
    total_queues=total_queues@entry=1) at /build/qemu-KH8VkI/qemu-2.5+dfsg/hw/net/vhost_net.c:371
#15 0x0000556a922d0675 in virtio_net_vhost_status (status=7 '\a', n=0x556a93bca458)
    at /build/qemu-KH8VkI/qemu-2.5+dfsg/hw/net/virtio-net.c:150
#16 virtio_net_set_status (vdev=<optimized out>, status=<optimized out>)
    at /build/qemu-KH8VkI/qemu-2.5+dfsg/hw/net/virtio-net.c:162
#17 0x0000556a924e87bc in qemu_del_net_client (nc=0x556a934dd550)
    at /build/qemu-KH8VkI/qemu-2.5+dfsg/net/net.c:418
#18 0x0000556a924e973d in net_cleanup () at /build/qemu-KH8VkI/qemu-2.5+dfsg/net/net.c:1370
#19 0x00007fe48e37c1a9 in __run_exit_handlers (status=0, listp=0x7fe48e7026c8 <__exit_funcs>,
    run_list_atexit=run_list_atexit@entry=true) at exit.c:82
#20 0x00007fe48e37c1f5 in __GI_exit (status=<optimized out>) at exit.c:104
#21 0x00007fe48e361f4c in __libc_start_main (main=0x556a922545a0 <main>, argc=90, argv=0x7ffe4d077008,
    init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7ffe4d076ff8)
    at libc-start.c:321
#22 0x0000556a9225c791 in _start ()

The assert showed that the vhost nic being turned off (and having its releases freed, after asking the BASE INDEX for the other peer) was not a NET_CLIENT_OPTIONS_KIND_VHOST_USER, but it was TAP one.

This likely because of the use after free, where the nic name being used by the net_vhost_user_event() callback (from s->chr_event() for CHR_EVENT_CLOSED) was different than it should. The stack had chardev2 but the callback had chardev4 (and the name is the index to find the nic to be released, being used wrongly, what suggests this fixes the issue).