Comment 0 for bug 851258

Revision history for this message
Ramana Radhakrishnan (ramana) wrote :

Consider the following testcase.

ypedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
typedef unsigned long uintptr_t;
typedef unsigned short uint16_t;
void f2(char *d, char const *s, int flags)
{
  uint32_t tmp0, tmp1;

  if (flags & 1)
    tmp0 = *s++;

  if (flags & 2)
    {
      uint16_t *ss = (void *)s;
      tmp1 = *ss++;
      s = (void *)ss;
    }

  if (flags & 1)
    *d++ = tmp0;

  if (flags & 2)
    {
      uint16_t *dd = (void *)d;
      *dd++ = tmp1;
      d = (void *)dd;
    }
}

GCC currently generates

 push {r4, r5}
 ands r5, r2, #1
 it ne
 ldrbne r4, [r1], #1 @ zero_extendqisi2
 ands r2, r2, #2
 it ne
 ldrhne r3, [r1, #0]
 cbz r5, .L4
 strb r4, [r0], #1
.L4:
 cbz r2, .L1
 strh r3, [r0, #0] @ movhi
.L1:
 pop {r4, r5}
 bx lr

This could very well instead be :

 tst r2, #1
        it ne
 ldrneb ip, [r1], #1 @ zero_extendqisi2
 tst r2, #2
        it ne
 ldrneh r3, [r1, #0]
 tst r2, #1
        it ne
 strneb ip, [r0], #1
 tst r2, #2
 strneh r3, [r0, #0] @ movhi
 bx lr

This is also a problem on other ports given that the a & b tst operation is CSE'd and the result is compared against 0.

cheers
Ramana