/*
* Written by J.T. Conklin <jtc@acorntoolworks.com>
* Public domain.
*/
#include <machine/asm.h>
#if defined(LIBC_SCCS)
RCSID("$NetBSD: memchr.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $")
#endif
ENTRY(memchr)
pushl %esi
movl 8(%esp),%eax
movzbl 12(%esp),%ecx
movl 16(%esp),%esi
/*
* Align to word boundary.
* Consider unrolling loop?
*/
testl %esi,%esi /* nbytes == 0? */
je .Lzero
.Lalign:
testb $3,%al
je .Lword_aligned
cmpb (%eax),%cl
je .Ldone
incl %eax
decl %esi
jnz .Lalign
jmp .Lzero
.Lword_aligned:
/* copy char to all bytes in word */
movb %cl,%ch
movl %ecx,%edx
sall $16,%ecx
orl %edx,%ecx
_ALIGN_TEXT
.Lloop:
cmpl $3,%esi /* nbytes > 4 */
jbe .Lbyte
movl (%eax),%edx
addl $4,%eax
xorl %ecx,%edx
subl $4,%esi
subl $0x01010101,%edx
testl $0x80808080,%edx
je .Lloop
/*
* In rare cases, the above loop may exit prematurely. We must
* return to the loop if none of the bytes in the word are
* equal to ch.
*/
/*
* High load-use latency on the Athlon leads to significant
* stalls, so we preload the next char as soon as possible
* instead of using cmp mem8, reg8.
*
* Alignment here avoids a stall on the Athlon, even though
* it's not a branch target.
*/
_ALIGN_TEXT
cmpb -4(%eax),%cl /* 1st byte == ch? */
movb -3(%eax),%dl
jne 1f
subl $4,%eax
jmp .Ldone
_ALIGN_TEXT
1: cmpb %dl,%cl /* 2nd byte == ch? */
movb -2(%eax),%dl
jne 1f
subl $3,%eax
jmp .Ldone
_ALIGN_TEXT
1: cmpb %dl,%cl /* 3rd byte == ch? */
movb -1(%eax),%dl
jne 1f
subl $2,%eax
jmp .Ldone
_ALIGN_TEXT
1: cmpb %dl,%cl /* 4th byte == ch? */
jne .Lloop
decl %eax
jmp .Ldone
.Lbyte:
testl %esi,%esi
je .Lzero
.Lbyte_loop:
cmpb (%eax),%cl
je .Ldone
incl %eax
decl %esi
jnz .Lbyte_loop
.Lzero:
xorl %eax,%eax
.Ldone:
popl %esi
ret
END(memchr)