/*
* Written by J.T. Conklin <jtc@acorntoolworks.com>
* Public domain.
*/
#include <machine/asm.h>
#if defined(LIBC_SCCS)
RCSID("$NetBSD: strcat.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $")
#endif
ENTRY(strcat)
pushl %ebx
movl 8(%esp),%ecx
movl 12(%esp),%eax
/*
* Align destination to word boundary.
* Consider unrolling loop?
*/
.Lscan:
.Lscan_align:
testb $3,%cl
je .Lscan_aligned
cmpb $0,(%ecx)
je .Lcopy
incl %ecx
jmp .Lscan_align
_ALIGN_TEXT
.Lscan_aligned:
.Lscan_loop:
movl (%ecx),%ebx
addl $4,%ecx
leal -0x01010101(%ebx),%edx
testl $0x80808080,%edx
je .Lscan_loop
/*
* In rare cases, the above loop may exit prematurely. We must
* return to the loop if none of the bytes in the word equal 0.
*/
/*
* The optimal code for determining whether each byte is zero
* differs by processor. This space-optimized code should be
* acceptable on all, especially since we don't expect it to
* be run frequently,
*/
testb %bl,%bl /* 1st byte == 0? */
jne 1f
subl $4,%ecx
jmp .Lcopy
1: testb %bh,%bh /* 2nd byte == 0? */
jne 1f
subl $3,%ecx
jmp .Lcopy
1: shrl $16,%ebx
testb %bl,%bl /* 3rd byte == 0? */
jne 1f
subl $2,%ecx
jmp .Lcopy
1: testb %bh,%bh /* 4th byte == 0? */
jne .Lscan_loop
subl $1,%ecx
/*
* Align source to a word boundary.
* Consider unrolling loop?
*/
.Lcopy:
.Lcopy_align:
testl $3,%eax
je .Lcopy_aligned
movb (%eax),%bl
incl %eax
movb %bl,(%ecx)
incl %ecx
testb %bl,%bl
jne .Lcopy_align
jmp .Ldone
_ALIGN_TEXT
.Lcopy_loop:
movl %ebx,(%ecx)
addl $4,%ecx
.Lcopy_aligned:
movl (%eax),%ebx
addl $4,%eax
leal -0x01010101(%ebx),%edx
testl $0x80808080,%edx
je .Lcopy_loop
/*
* In rare cases, the above loop may exit prematurely. We must
* return to the loop if none of the bytes in the word equal 0.
*/
movb %bl,(%ecx)
incl %ecx
testb %bl,%bl
je .Ldone
movb %bh,(%ecx)
incl %ecx
testb %bh,%bh
je .Ldone
shrl $16,%ebx
movb %bl,(%ecx)
incl %ecx
testb %bl,%bl
je .Ldone
movb %bh,(%ecx)
incl %ecx
testb %bh,%bh
jne .Lcopy_aligned
.Ldone:
movl 8(%esp),%eax
popl %ebx
ret
END(strcat)