dnl ARM64 mpn_rshift.
dnl Copyright 2013, 2014 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 3 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
C Cortex-A53 ?
C Cortex-A57 ?
changecom(@&*$)
define(`rp_arg', `x0')
define(`up', `x1')
define(`n', `x2')
define(`cnt', `x3')
define(`rp', `x16')
define(`tnc',`x8')
ASM_START()
PROLOGUE(mpn_rshift)
mov rp, rp_arg
sub tnc, xzr, cnt
tbz n, #0, L(bx0)
L(bx1): ldr x4, [up,#0]
tbnz n, #1, L(b11)
L(b01): lsl x0, x4, tnc
lsr x18, x4, cnt
sub n, n, #1
cbnz n, L(gt1)
str x18, [rp,#0]
ret
L(gt1): ldp x5, x4, [up,#8]
sub up, up, #8
sub rp, rp, #32
b L(lo2)
L(b11): lsl x0, x4, tnc
lsr x9, x4, cnt
ldp x7, x6, [up,#8]
add n, n, #1
sub up, up, #24
sub rp, rp, #48
b L(lo0)
L(bx0): ldp x5, x4, [up,#0]
tbz n, #1, L(b00)
L(b10): lsl x0, x5, tnc
lsr x13, x5, cnt
lsl x10, x4, tnc
lsr x18, x4, cnt
sub n, n, #2
cbnz n, L(gt2)
orr x10, x10, x13
stp x10, x18, [rp,#0]
ret
L(gt2): ldp x5, x4, [up,#16]
orr x10, x10, x13
str x10, [rp,#0]
sub rp, rp, #24
b L(lo2)
L(b00): lsl x0, x5, tnc
lsr x13, x5, cnt
lsl x10, x4, tnc
lsr x9, x4, cnt
ldp x7, x6, [up,#16]
orr x10, x10, x13
str x10, [rp,#0]
sub up, up, #16
sub rp, rp, #40
b L(lo0)
ALIGN(16)
L(top): ldp x5, x4, [up,#48]
add rp, rp, #32 C integrate with stp?
add up, up, #32 C integrate with ldp?
orr x11, x11, x9
orr x10, x10, x13
stp x11, x10, [rp,#16]
L(lo2): lsl x11, x5, tnc
lsr x13, x5, cnt
lsl x10, x4, tnc
lsr x9, x4, cnt
ldp x7, x6, [up,#32]
orr x11, x11, x18
orr x10, x10, x13
stp x11, x10, [rp,#32]
L(lo0): sub n, n, #4
lsl x11, x7, tnc
lsr x13, x7, cnt
lsl x10, x6, tnc
lsr x18, x6, cnt
cbnz n, L(top)
L(end): orr x11, x11, x9
orr x10, x10, x13
stp x11, x10, [rp,#48]
str x18, [rp,#64]
ret
EPILOGUE()