Comment 7 for bug 852760

Revision history for this message
In , Jseward (jseward) wrote :

This seems to me like a bug in gcc. From the following analysis
(start reading at 0x400a38), the value loaded from memory is never
used -- xmm12 is completely overwritten by subsequent instructions,
either in the post-loop block, or in the first instruction of the
next iteration.

==12860== Invalid read of size 8
==12860== at 0x400A38: integrate_gf_npbc_

  # def xmm12 (low half loaded, high half zeroed)
  4009d8: f2 44 0f 10 24 16 movsd (%rsi,%rdx,1),%xmm12
  4009de: 41 83 c6 01 add $0x1,%r14d
  4009e2: f2 0f 10 31 movsd (%rcx),%xmm6
  4009e6: 66 44 0f 16 64 16 08 movhpd 0x8(%rsi,%rdx,1),%xmm12
  4009ed: f2 41 0f 10 04 17 movsd (%r15,%rdx,1),%xmm0
  4009f3: 66 0f 16 71 08 movhpd 0x8(%rcx),%xmm6
  4009f8: 66 41 0f 28 dc movapd %xmm12,%xmm3
  4009fd: f2 44 0f 10 61 10 movsd 0x10(%rcx),%xmm12
  400a03: 66 0f 28 ce movapd %xmm6,%xmm1
  400a07: 66 41 0f 16 44 17 08 movhpd 0x8(%r15,%rdx,1),%xmm0
  400a0e: 66 44 0f 16 61 18 movhpd 0x18(%rcx),%xmm12
  400a14: f2 0f 10 33 movsd (%rbx),%xmm6
  400a18: 66 0f 28 d0 movapd %xmm0,%xmm2
  400a1c: 48 83 c2 10 add $0x10,%rdx
  400a20: 66 41 0f 14 cc unpcklpd %xmm12,%xmm1
  400a25: 66 0f 16 73 08 movhpd 0x8(%rbx),%xmm6
  400a2a: f2 44 0f 10 63 10 movsd 0x10(%rbx),%xmm12
  400a30: 48 83 c1 20 add $0x20,%rcx
  400a34: 66 0f 28 c6 movapd %xmm6,%xmm0

  # load high half xmm12 (error reported here). low half unchanged.
  400a38: 66 44 0f 16 63 18 movhpd 0x18(%rbx),%xmm12
  400a3e: 66 0f 28 f1 movapd %xmm1,%xmm6
  400a42: 66 0f 59 ca mulpd %xmm2,%xmm1
  400a46: 48 83 c3 20 add $0x20,%rbx
  400a4a: 41 39 ee cmp %ebp,%r14d

  # reads low half xmm12 only
  400a4d: 66 41 0f 14 c4 unpcklpd %xmm12,%xmm0
  400a52: 66 0f 59 f3 mulpd %xmm3,%xmm6
  400a56: 66 0f 59 d8 mulpd %xmm0,%xmm3
  400a5a: 66 0f 58 f9 addpd %xmm1,%xmm7
  400a5e: 66 0f 59 c2 mulpd %xmm2,%xmm0
  400a62: 66 44 0f 58 de addpd %xmm6,%xmm11
  400a67: 66 0f 58 eb addpd %xmm3,%xmm5
  400a6b: 66 0f 58 e0 addpd %xmm0,%xmm4
  400a6f: 0f 82 63 ff ff ff jb 4009d8 # (loop head)

  400a75: 66 0f 28 c4 movapd %xmm4,%xmm0
  400a79: 8b 54 24 a8 mov -0x58(%rsp),%edx

  # def xmm12 (overwrite both halves)
  400a7d: 66 44 0f 28 e7 movapd %xmm7,%xmm12