qemu crashes with assertion error `off_cur_end >= off_cur' failed

Bug #1729501 reported by Liang Dai
6
This bug affects 1 person
Affects Status Importance Assigned to Milestone
QEMU
Fix Released
Undecided
Unassigned

Bug Description

My host environment: Xen + QEMU

git clones today's xen git and qemu git (2017-11-02)

xen -- git://xenbits.xen.org/xen.git
commit 24fb44e971a62b345c7b6ca3c03b454a1e150abe

qemu -- https://github.com/qemu/qemu
commit 47ba789c97c8d201d01058b00a14d8a9a85fcfe9

QEMU was compiled using:
./configure --prefix=/mnt/bin/ --enable-xen --target-list=i386-softmmu --extra-cflags="-I/mnt/xen/tools/include -I/mnt/xen/tools/libxc -I/mnt/xen/tools/xenstore" --extra-ldflags="-L/mnt/xen/tools/libxc -L/mnt/xen/tools/xenstore" --enable-debug --enable-debug-stack-usage

Xen was configured with the above QEMU distribution:
./configure --with-system-qemu=/mnt/bin/bin/qemu-system-i386

QEMU command line:
/mnt/bin/bin/qemu-system-i386 -xen-domid 28 -chardev socket,id=libxl-cmd,path=/var/run/xen/qmp-libxl-28,server,nowait -no-shutdown -mon chardev=libxl-cmd,mode=control -chardev socket,id=libxenstat-cmd,path=/var/run/xen/qmp-libxenstat-28,server,nowait -mon chardev=libxenstat-cmd,mode=control -nodefaults -no-user-config -name ubu_hvm -vnc 0.0.0.0:1,to=99 -display none -serial pty -device cirrus-vga,vgamem_mb=8 -boot order=c -smp 2,maxcpus=2 -device rtl8139,id=nic0,netdev=net0,mac=00:16:3e:74:34:32 -netdev type=tap,id=net0,ifname=vif28.0-emu,script=no,downscript=no -device rtl8139,id=nic1,netdev=net1,mac=00:16:3e:5f:48:e4 -netdev type=tap,id=net1,ifname=vif28.1-emu,script=no,downscript=no -machine xenfv -m 1504 -drive file=/mnt/10G.hdd,if=ide,index=0,media=disk,format=raw,cache=writeback

Produce:
I run a fuzzer program in guest vm, it may set incorrect values for graphics registers, sequencer registers and other registers.

Seeing the following error from /var/log/xen/qemu-dm-<vm-name>.log:
qemu-system-i386: hw/display/cirrus_vga.c:712: cirrus_invalidate_region: Assertion `off_cur_end >= off_cur' failed.

I can reproduce it at anytime, if you need to gather more diagnostic information or try test patches, I'm happy to help.

gdb bt:
#0 0x00007f81a64f8c37 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
#1 0x00007f81a64fc028 in __GI_abort () at abort.c:89
#2 0x00007f81a64f1bf6 in __assert_fail_base (fmt=0x7f81a6646018 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n",
    assertion=assertion@entry=0x55d70cf8cdf2 "off_cur_end >= off_cur", file=file@entry=0x55d70cf8cda9 "hw/display/cirrus_vga.c", line=line@entry=712,
    function=function@entry=0x55d70cf8db60 <__PRETTY_FUNCTION__.40643> "cirrus_invalidate_region") at assert.c:92
#3 0x00007f81a64f1ca2 in __GI___assert_fail (assertion=0x55d70cf8cdf2 "off_cur_end >= off_cur", file=0x55d70cf8cda9 "hw/display/cirrus_vga.c", line=712,
    function=0x55d70cf8db60 <__PRETTY_FUNCTION__.40643> "cirrus_invalidate_region") at assert.c:101
#4 0x000055d70cb66445 in cirrus_invalidate_region (s=0x55d70ee3a4b0, off_begin=4190568, off_pitch=1842, bytesperline=5056, lines=2922) at hw/display/cirrus_vga.c:712
#5 0x000055d70cb6660c in cirrus_bitblt_common_patterncopy (s=0x55d70ee3a4b0) at hw/display/cirrus_vga.c:752
#6 0x000055d70cb6676d in cirrus_bitblt_videotovideo_patterncopy (s=0x55d70ee3a4b0) at hw/display/cirrus_vga.c:786
#7 0x000055d70cb670c5 in cirrus_bitblt_videotovideo (s=0x55d70ee3a4b0) at hw/display/cirrus_vga.c:986
#8 0x000055d70cb678bf in cirrus_bitblt_start (s=0x55d70ee3a4b0) at hw/display/cirrus_vga.c:1136
#9 0x000055d70cb6880b in cirrus_vga_write_gr (s=0x55d70ee3a4b0, reg_index=42, reg_value=228) at hw/display/cirrus_vga.c:1652
#10 0x000055d70cb6ab86 in cirrus_vga_ioport_write (opaque=0x55d70ee3a4b0, addr=975, val=228, size=1) at hw/display/cirrus_vga.c:2754
#11 0x000055d70c91d9c0 in memory_region_write_accessor (mr=0x55d70ee4af70, addr=31, value=0x7fffdaaeaf38, size=1, shift=8, mask=255, attrs=...)
    at /mnt/qemu/memory.c:560
#12 0x000055d70c91dc3a in access_with_adjusted_size (addr=30, value=0x7fffdaaeaf38, size=2, access_size_min=1, access_size_max=1,
    access_fn=0x55d70c91d8c9 <memory_region_write_accessor>, mr=0x55d70ee4af70, attrs=...) at /mnt/qemu/memory.c:627
#13 0x000055d70c920f48 in memory_region_dispatch_write (mr=0x55d70ee4af70, addr=30, data=58410, size=2, attrs=...) at /mnt/qemu/memory.c:1503
#14 0x000055d70c8c51e0 in flatview_write_continue (fv=0x55d70ecb66d0, addr=974, attrs=..., buf=0x7fffdaaeb0f0 "*\344W\026\377\177", len=4, addr1=30, l=2,
    mr=0x55d70ee4af70) at /mnt/qemu/exec.c:2951
#15 0x000055d70c8c5390 in flatview_write (fv=0x55d70ecb66d0, addr=974, attrs=..., buf=0x7fffdaaeb0f0 "*\344W\026\377\177", len=4) at /mnt/qemu/exec.c:3002
#16 0x000055d70c8c5406 in address_space_write (as=0x55d70d70d5e0 <address_space_io>, addr=974, attrs=..., buf=0x7fffdaaeb0f0 "*\344W\026\377\177", len=4)
    at /mnt/qemu/exec.c:3014
#17 0x000055d70c914fb3 in cpu_outl (addr=974, val=374858794) at /mnt/qemu/ioport.c:81
#18 0x000055d70ca0253f in do_outp (addr=974, size=4, val=374858794) at /mnt/qemu/hw/i386/xen/xen-hvm.c:782
#19 0x000055d70ca02888 in cpu_ioreq_pio (req=0x7fffdaaeb210) at /mnt/qemu/hw/i386/xen/xen-hvm.c:852
#20 0x000055d70ca02f2e in handle_ioreq (state=0x55d70e0cf3d0, req=0x7fffdaaeb210) at /mnt/qemu/hw/i386/xen/xen-hvm.c:961
#21 0x000055d70ca0343e in cpu_handle_ioreq (opaque=0x55d70e0cf3d0) at /mnt/qemu/hw/i386/xen/xen-hvm.c:1089
#22 0x000055d70ce75d69 in aio_dispatch_handlers (ctx=0x55d70e098550) at util/aio-posix.c:406
#23 0x000055d70ce75f0b in aio_dispatch (ctx=0x55d70e098550) at util/aio-posix.c:437
#24 0x000055d70ce70b46 in aio_ctx_dispatch (source=0x55d70e098550, callback=0x0, user_data=0x0) at util/async.c:261
#25 0x00007f81a7215e04 in g_main_context_dispatch () from /lib/x86_64-linux-gnu/libglib-2.0.so.0
#26 0x000055d70ce74455 in glib_pollfds_poll () at util/main-loop.c:214
#27 0x000055d70ce7456a in os_host_main_loop_wait (timeout=16061710) at util/main-loop.c:261
#28 0x000055d70ce7463f in main_loop_wait (nonblocking=0) at util/main-loop.c:515
#29 0x000055d70ca8e6a6 in main_loop () at vl.c:1995
#30 0x000055d70ca96815 in main (argc=42, argv=0x7fffdaaeb888, envp=0x7fffdaaeb9e0) at vl.c:4897

Tags: cirrus crash
Revision history for this message
Liang Dai (liangdai) wrote :
Liang Dai (liangdai)
description: updated
Revision history for this message
Gerd Hoffmann (kraxel-redhat) wrote : Re: [Qemu-devel] [Bug 1729501] [NEW] qemu crashes with assertion error `off_cur_end >= off_cur' failed

  Hi,

> > -device cirrus-vga,vgamem_mb=8

Don't do that. cirrus has 4 MB video memory, like physical hardware
has. And you can't change that (i.e. the guest wouldn't be able to use
it if you assign more). The switch exists for live migration
compatibility only, because older qemu versions erroneously assigned
more than 4 MB to cirrus.

I expect you can't trigger the assert any more if you remove
"vgamem_mb=8".

cheers,
  Gerd

Revision history for this message
Liang Dai (liangdai) wrote :

Hi Gerd,

Xen toolstack uses 8 MB by default, see:
https://github.com/xen-project/xen/blob/staging/tools/libxl/libxl_create.c#L292

Now I change it to 4MB, QEMU command line:
/mnt/bin/bin/qemu-system-i386 -xen-domid 38 -chardev socket,id=libxl-cmd,path=/var/run/xen/qmp-libxl-38,server,nowait -no-shutdown -mon chardev=libxl-cmd,mode=control -chardev socket,id=libxenstat-cmd,path=/var/run/xen/qmp-libxenstat-38,server,nowait -mon chardev=libxenstat-cmd,mode=control -nodefaults -no-user-config -name ubu_hvm -vnc 0.0.0.0:1,to=99 -display none -serial pty -device cirrus-vga,vgamem_mb=4 -boot order=c -smp 2,maxcpus=2 -device rtl8139,id=nic0,netdev=net0,mac=00:16:3e:74:34:32 -netdev type=tap,id=net0,ifname=vif38.0-emu,script=no,downscript=no -device rtl8139,id=nic1,netdev=net1,mac=00:16:3e:5f:48:e4 -netdev type=tap,id=net1,ifname=vif38.1-emu,script=no,downscript=no -machine xenfv -m 1508 -drive file=/mnt/10G.hdd,if=ide,index=0,media=disk,format=raw,cache=writeback

Run fuzzer program again and get two segfaults:

(gdb) bt
#0 0x000055709265e647 in vga_draw_text (s=0x5570949744b0, full_update=1) at /mnt/qemu/hw/display/vga.c:1283
#1 0x000055709265fd04 in vga_update_display (opaque=0x5570949744b0) at /mnt/qemu/hw/display/vga.c:1766
#2 0x0000557092a13756 in graphic_hw_update (con=0x557094a7bb20) at ui/console.c:263
#3 0x0000557092a27f8e in vnc_refresh (dcl=0x7fc1f688b070) at ui/vnc.c:2855
#4 0x0000557092a1774e in dpy_refresh (s=0x557094a74c60) at ui/console.c:1594
#5 0x0000557092a1344c in gui_update (opaque=0x557094a74c60) at ui/console.c:201
#6 0x0000557092b7282a in timerlist_run_timers (timer_list=0x557093bd1800) at util/qemu-timer.c:536
#7 0x0000557092b72895 in qemu_clock_run_timers (type=QEMU_CLOCK_REALTIME) at util/qemu-timer.c:547
#8 0x0000557092b72d96 in qemu_clock_run_all_timers () at util/qemu-timer.c:662
#9 0x0000557092b73666 in main_loop_wait (nonblocking=0) at util/main-loop.c:521
#10 0x000055709278d6a6 in main_loop () at vl.c:1995
#11 0x0000557092795815 in main (argc=42, argv=0x7fffd33f1eb8, envp=0x7fffd33f2010) at vl.c:4897

(gdb) bt
#0 __memcpy_sse2_unaligned () at ../sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S:36
#1 0x000055bed7158e30 in vnc_refresh_server_surface (vd=0x7fb9acfc4010) at ui/vnc.c:2827
#2 0x000055bed7158fc4 in vnc_refresh (dcl=0x7fb9acfc4070) at ui/vnc.c:2862
#3 0x000055bed714874e in dpy_refresh (s=0x55bed95d5c60) at ui/console.c:1594
#4 0x000055bed714444c in gui_update (opaque=0x55bed95d5c60) at ui/console.c:201
#5 0x000055bed72a382a in timerlist_run_timers (timer_list=0x55bed8732800) at util/qemu-timer.c:536
#6 0x000055bed72a3895 in qemu_clock_run_timers (type=QEMU_CLOCK_REALTIME) at util/qemu-timer.c:547
#7 0x000055bed72a3d96 in qemu_clock_run_all_timers () at util/qemu-timer.c:662
#8 0x000055bed72a4666 in main_loop_wait (nonblocking=0) at util/main-loop.c:521
#9 0x000055bed6ebe6a6 in main_loop () at vl.c:1995
#10 0x000055bed6ec6815 in main (argc=42, argv=0x7ffd8134a6c8, envp=0x7ffd8134a820) at vl.c:4897

Attached gdb full backtrace.

Revision history for this message
Liang Dai (liangdai) wrote :

Hi Gerd,

Would you please take a look at this patch, testing shows it prevents these crashes. I'm not an expert, just to give you more information.

I cannot public the fuzzer program, if you need to gather more diagnostic information or try test patches, I'm happy to help.

diff --git a/hw/display/vga.c b/hw/display/vga.c
index 1d19f6b..ab6cd47 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -1240,6 +1240,24 @@ static void vga_draw_text(VGACommonState *s, int full_update)
     palette = s->last_palette;
     x_incr = cw * surface_bytes_per_pixel(surface);

+ /*
+ * In theory, line_offset = width * 4, if line_offset is
+ * less than width * 4, then afterwards, memory operation
+ * on server surface might overflow
+ */
+ if (s->line_offset < 4 * width) {
+ s->line_offset = 4 * width;
+ line_offset = s->line_offset;
+ }
+
+ /*
+ * The above if statement might introduce overflow since it
+ * might increase s->line_offset
+ */
+ if (s->start_addr * 4 + line_offset * height >= s->vram_size) {
+ height = (s->vram_size - 1 - s->start_addr * 4) / line_offset;
+ }
+
     if (full_update) {
         s->full_update_text = 1;
     }
@@ -1464,7 +1482,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
 {
     DisplaySurface *surface = qemu_console_surface(s->con);
     int y1, y, update, linesize, y_start, double_scan, mask, depth;
- int width, height, shift_control, bwidth, bits;
+ int width, height, shift_control, bwidth, bits, bpp;
     ram_addr_t page0, page1, region_start, region_end;
     DirtyBitmapSnapshot *snap = NULL;
     int disp_width, multi_scan, multi_run;
@@ -1522,6 +1540,13 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
     }

     depth = s->get_bpp(s);
+ bpp = depth / 8;
+ if (s->line_offset < disp_width * bpp) {
+ s->line_offset = disp_width * bpp;
+ }
+ if (s->start_addr * 4 + s->line_offset * height >= s->vram_size) {
+ height = (s->vram_size - 1 - s->start_addr * 4) / s->line_offset;
+ }

     /*
      * Check whether we can share the surface with the backend
@@ -1717,7 +1742,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
 static void vga_draw_blank(VGACommonState *s, int full_update)
 {
     DisplaySurface *surface = qemu_console_surface(s->con);
- int i, w;
+ int i, h, w;
     uint8_t *d;

     if (!full_update)
@@ -1727,12 +1752,16 @@ static void vga_draw_blank(VGACommonState *s, int full_update)

     w = s->last_scr_width * surface_bytes_per_pixel(surface);
     d = surface_data(surface);
+ if (w > surface_stride(surface)) {
+ w = surface_stride(surface);
+ }
+ h = MIN(s->last_scr_height, surface_height(surface));
     for(i = 0; i < s->last_scr_height; i++) {
         memset(d, 0, w);
         d += surface_stride(surface);
     }
     dpy_gfx_update(s->con, 0, 0,
- s->last_scr_width, s->last_scr_height);
+ w, h);
 }

 #define GMODE_TEXT 0

Revision history for this message
Liang Dai (liangdai) wrote :

Hi Gerd,

Any chance to have a look?

This issue still can be reproduced with the latest code.
(commit 281f327487c9c9b1599f93c589a408bbf4a651b8)

Please check the attachment for full gdb backtrace.

Revision history for this message
Liang Dai (liangdai) wrote :
Changed in qemu:
status: New → Fix Committed
Revision history for this message
Thomas Huth (th-huth) wrote :
Changed in qemu:
status: Fix Committed → Fix Released
To post a comment you must log in.
This report contains Public information  
Everyone can see this information.

Other bug subscribers

Remote bug watches

Bug watches keep track of this bug in other bug trackers.