xorl %r8d, %r8d
/* If ECX > VEC_SIZE * 2, skip ECX - (VEC_SIZE * 2) bytes. */
subl $(VEC_SIZE * 2), %ecx
jle 1f
/* Skip ECX bytes. */
shrq %cl, %rdi
/* R8 has number of bytes skipped. */
movl %ecx, %r8d
1:
/* Before jumping back to the loop, set ESI to the number of
VEC_SIZE * 4 blocks before page crossing. */
movl $(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi
testq %rdi, %rdi
je L(back_to_loop)
When this branch is taken, there are (VEC_SIZE * 4) + %r10 matching bytes
starting at %rax, which may be >= the maximum offset.
There are
L(loop_ cross_page_ 2_vec):
/* The first VEC_SIZE * 2 bytes match or are ignored. */
vmovdqu (VEC_SIZE * 2)(%rax, %r10), %ymm2
vmovdqu (VEC_SIZE * 3)(%rax, %r10), %ymm3
VPCMPEQ (VEC_SIZE * 2)(%rdx, %r10), %ymm2, %ymm5
VPMINU %ymm2, %ymm5, %ymm5
VPCMPEQ (VEC_SIZE * 3)(%rdx, %r10), %ymm3, %ymm6
VPCMPEQ %ymm7, %ymm5, %ymm5
VPMINU %ymm3, %ymm6, %ymm6
VPCMPEQ %ymm7, %ymm6, %ymm6
vpmovmskb %ymm5, %edi
vpmovmskb %ymm6, %esi
salq $32, %rsi
xorq %rsi, %rdi
xorl %r8d, %r8d
/* If ECX > VEC_SIZE * 2, skip ECX - (VEC_SIZE * 2) bytes. */
subl $(VEC_SIZE * 2), %ecx
jle 1f
/* Skip ECX bytes. */
shrq %cl, %rdi
/* R8 has number of bytes skipped. */
movl %ecx, %r8d
1:
/* Before jumping back to the loop, set ESI to the number of
VEC_SIZE * 4 blocks before page crossing. */
movl $(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi
testq %rdi, %rdi
je L(back_to_loop)
When this branch is taken, there are (VEC_SIZE * 4) + %r10 matching bytes
starting at %rax, which may be >= the maximum offset.