/*
* This m4 code has been taken from The SPARC Architecture Manual Version 8.
*/
/*
* Division/Remainder
*
* Input is:
* dividend -- the thing being divided
* divisor -- how many ways to divide it
* Important parameters:
* N -- how many bits per iteration we try to get
* as our current guess:
* WORDSIZE -- how many bits altogether we're talking about:
* obviously:
* A derived constant:
* TOPBITS -- how many bits are in the top "decade" of a number:
*
* Important variables are:
* Q -- the partial quotient under development -- initially 0
* R -- the remainder so far -- initially == the dividend
* ITER -- number of iterations of the main division loop which will
* be required. Equal to CEIL( lg2(quotient)/4 )
* Note that this is log_base_(2ˆ4) of the quotient.
* V -- the current comparand -- initially divisor*2ˆ(ITER*4-1)
* Cost:
* current estimate for non-large dividend is
* CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
* a large dividend is one greater than 2ˆ(31-4 ) and takes a
* different path, as the upper bits of the quotient must be developed
* one bit at a time.
* This uses the m4 and cpp macro preprocessors.
*/
/*
* This is the recursive definition of how we develop quotient digits.
* It takes three important parameters:
* $1 -- the current depth, 1<=$1<=4
* $2 -- the current accumulation of quotient bits
* 4 -- max depth
* We add a new bit to $2 and either recurse or insert the bits in the quotient.
* Dynamic input:
* %o3 -- current remainder
* %o2 -- current quotient
* %o5 -- current comparand
* cc -- set on current value of %o3
* Dynamic output:
* %o3', %o2', %o5', cc'
*/
#include "../assembly.h"
.text
.align 32
DEFINE_COMPILERRT_FUNCTION(__udivsi3)
b divide
mov 0,%g3 ! result always nonnegative
.text
.align 32
DEFINE_COMPILERRT_FUNCTION(__divsi3)
orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative
bge divide ! if not, skip this junk
xor %o1,%o0,%g3 ! record sign of result in sign of %g3
tst %o1
bge 2f
tst %o0
! %o1 < 0
bge divide
neg %o1
2:
! %o0 < 0
neg %o0
! FALL THROUGH
divide:
! Compute size of quotient, scale comparand.
orcc %o1,%g0,%o5 ! movcc %o1,%o5
te 2 ! if %o1 = 0
mov %o0,%o3
mov 0,%o2
sethi %hi(1<<(32-4 -1)),%g1
cmp %o3,%g1
blu not_really_big
mov 0,%o4
!
! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here,
! as our usual 4-at-a-shot divide step will cause overflow and havoc.
! The total number of bits in the result here is 4*%o4+%g2, where
! %g2 <= 4.
! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into
! the top decade: so don't even bother to compare to %o3.
1:
cmp %o5,%g1
bgeu 3f
mov 1,%g2
sll %o5,4,%o5
b 1b
inc %o4
! Now compute %g2
2: addcc %o5,%o5,%o5
bcc not_too_big
add %g2,1,%g2
! We're here if the %o1 overflowed when Shifting.
! This means that %o3 has the high-order bit set.
! Restore %o5 and subtract from %o3.
sll %g1,4 ,%g1 ! high order bit
srl %o5,1,%o5 ! rest of %o5
add %o5,%g1,%o5
b do_single_div
dec %g2
not_too_big:
3: cmp %o5,%o3
blu 2b
nop
be do_single_div
nop
! %o5 > %o3: went too far: back up 1 step
! srl %o5,1,%o5
! dec %g2
! do single-bit divide steps
!
! We have to be careful here. We know that %o3 >= %o5, so we can do the
! first divide step without thinking. BUT, the others are conditional,
! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
! order bit set in the first step, just falling into the regular
! division loop will mess up the first time around.
! So we unroll slightly...
do_single_div:
deccc %g2
bl end_regular_divide
nop
sub %o3,%o5,%o3
mov 1,%o2
b,a end_single_divloop
! EMPTY
single_divloop:
sll %o2,1,%o2
bl 1f
srl %o5,1,%o5
! %o3 >= 0
sub %o3,%o5,%o3
b 2f
inc %o2
1: ! %o3 < 0
add %o3,%o5,%o3
dec %o2
2:
end_single_divloop:
deccc %g2
bge single_divloop
tst %o3
b,a end_regular_divide
! EMPTY
not_really_big:
1:
sll %o5,4,%o5
cmp %o5,%o3
bleu 1b
inccc %o4
be got_result
dec %o4
do_regular_divide:
! Do the main division iteration
tst %o3
! Fall through into divide loop
divloop:
sll %o2,4,%o2
!depth 1, accumulated bits 0
bl L.1.16
srl %o5,1,%o5
! remainder is nonnegative
subcc %o3,%o5,%o3
!depth 2, accumulated bits 1
bl L.2.17
srl %o5,1,%o5
! remainder is nonnegative
subcc %o3,%o5,%o3
!depth 3, accumulated bits 3
bl L.3.19
srl %o5,1,%o5
! remainder is nonnegative
subcc %o3,%o5,%o3
!depth 4, accumulated bits 7
bl L.4.23
srl %o5,1,%o5
! remainder is nonnegative
subcc %o3,%o5,%o3
b 9f
add %o2, (7*2+1), %o2
L.4.23:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (7*2-1), %o2
L.3.19:
! remainder is negative
addcc %o3,%o5,%o3
!depth 4, accumulated bits 5
bl L.4.21
srl %o5,1,%o5
! remainder is nonnegative
subcc %o3,%o5,%o3
b 9f
add %o2, (5*2+1), %o2
L.4.21:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (5*2-1), %o2
L.2.17:
! remainder is negative
addcc %o3,%o5,%o3
!depth 3, accumulated bits 1
bl L.3.17
srl %o5,1,%o5
! remainder is nonnegative
subcc %o3,%o5,%o3
!depth 4, accumulated bits 3
bl L.4.19
srl %o5,1,%o5
! remainder is nonnegative
subcc %o3,%o5,%o3
b 9f
add %o2, (3*2+1), %o2
L.4.19:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (3*2-1), %o2
L.3.17:
! remainder is negative
addcc %o3,%o5,%o3
!depth 4, accumulated bits 1
bl L.4.17
srl %o5,1,%o5
! remainder is nonnegative
subcc %o3,%o5,%o3
b 9f
add %o2, (1*2+1), %o2
L.4.17:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (1*2-1), %o2
L.1.16:
! remainder is negative
addcc %o3,%o5,%o3
!depth 2, accumulated bits -1
bl L.2.15
srl %o5,1,%o5
! remainder is nonnegative
subcc %o3,%o5,%o3
!depth 3, accumulated bits -1
bl L.3.15
srl %o5,1,%o5
! remainder is nonnegative
subcc %o3,%o5,%o3
!depth 4, accumulated bits -1
bl L.4.15
srl %o5,1,%o5
! remainder is nonnegative
subcc %o3,%o5,%o3
b 9f
add %o2, (-1*2+1), %o2
L.4.15:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-1*2-1), %o2
L.3.15:
! remainder is negative
addcc %o3,%o5,%o3
!depth 4, accumulated bits -3
bl L.4.13
srl %o5,1,%o5
! remainder is nonnegative
subcc %o3,%o5,%o3
b 9f
add %o2, (-3*2+1), %o2
L.4.13:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-3*2-1), %o2
L.2.15:
! remainder is negative
addcc %o3,%o5,%o3
!depth 3, accumulated bits -3
bl L.3.13
srl %o5,1,%o5
! remainder is nonnegative
subcc %o3,%o5,%o3
!depth 4, accumulated bits -5
bl L.4.11
srl %o5,1,%o5
! remainder is nonnegative
subcc %o3,%o5,%o3
b 9f
add %o2, (-5*2+1), %o2
L.4.11:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-5*2-1), %o2
L.3.13:
! remainder is negative
addcc %o3,%o5,%o3
!depth 4, accumulated bits -7
bl L.4.9
srl %o5,1,%o5
! remainder is nonnegative
subcc %o3,%o5,%o3
b 9f
add %o2, (-7*2+1), %o2
L.4.9:
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-7*2-1), %o2
9:
end_regular_divide:
deccc %o4
bge divloop
tst %o3
bl,a got_result
! non-restoring fixup if remainder < 0, otherwise annulled
dec %o2
got_result:
tst %g3
bl,a 1f
! negate for answer < 0, otherwise annulled
neg %o2,%o2 ! %o2 <- -%o2
1:
retl ! leaf-routine return
mov %o2,%o0 ! quotient <- %o2