#include "sparc_arch.h"
#ifdef __arch64__
.register %g2,#scratch
.register %g3,#scratch
#endif
.text
.align 32
.globl des_t4_key_expand
.type des_t4_key_expand,#function
des_t4_key_expand:
andcc %o0, 0x7, %g0
.word 0x91b20300 !alignaddr %o0,%g0,%o0
bz,pt %icc, 1f
ldd [%o0 + 0x00], %f0
ldd [%o0 + 0x08], %f2
.word 0x81b00902 !faligndata %f0,%f2,%f0
1: .word 0x81b026c0 !des_kexpand %f0,0,%f0,
.word 0x85b026c1 !des_kexpand %f0,1,%f2,
std %f0, [%o1 + 0x00]
.word 0x8db0a6c3 !des_kexpand %f2,3,%f6,
std %f2, [%o1 + 0x08]
.word 0x89b0a6c2 !des_kexpand %f2,2,%f4,
.word 0x95b1a6c3 !des_kexpand %f6,3,%f10,
std %f6, [%o1 + 0x18]
.word 0x91b1a6c2 !des_kexpand %f6,2,%f8,
std %f4, [%o1 + 0x10]
.word 0x9db2a6c3 !des_kexpand %f10,3,%f14,
std %f10, [%o1 + 0x28]
.word 0x99b2a6c2 !des_kexpand %f10,2,%f12,
std %f8, [%o1 + 0x20]
.word 0xa1b3a6c1 !des_kexpand %f14,1,%f16,
std %f14, [%o1 + 0x38]
.word 0xa9b426c3 !des_kexpand %f16,3,%f20,
std %f12, [%o1 + 0x30]
.word 0xa5b426c2 !des_kexpand %f16,2,%f18,
std %f16, [%o1 + 0x40]
.word 0xb1b526c3 !des_kexpand %f20,3,%f24,
std %f20, [%o1 + 0x50]
.word 0xadb526c2 !des_kexpand %f20,2,%f22,
std %f18, [%o1 + 0x48]
.word 0xb9b626c3 !des_kexpand %f24,3,%f28,
std %f24, [%o1 + 0x60]
.word 0xb5b626c2 !des_kexpand %f24,2,%f26,
std %f22, [%o1 + 0x58]
.word 0xbdb726c1 !des_kexpand %f28,1,%f30,
std %f28, [%o1 + 0x70]
std %f26, [%o1 + 0x68]
retl
std %f30, [%o1 + 0x78]
.size des_t4_key_expand,.-des_t4_key_expand
.globl des_t4_cbc_encrypt
.align 32
des_t4_cbc_encrypt:
cmp %o2, 0
be,pn SIZE_T_CC, .Lcbc_abort
srln %o2, 0, %o2 ! needed on v8+, "nop" on v9
ld [%o4 + 0], %f0 ! load ivec
ld [%o4 + 4], %f1
and %o0, 7, %g1
andn %o0, 7, %o0
sll %g1, 3, %g1
mov 0xff, %g3
prefetch [%o0], 20
prefetch [%o0 + 63], 20
sub %g0, %g1, %g2
and %o1, 7, %g4
.word 0x93b24340 !alignaddrl %o1,%g0,%o1
srl %g3, %g4, %g3
srlx %o2, 3, %o2
movrz %g4, 0, %g3
prefetch [%o1], 22
ldd [%o3 + 0x00], %f4 ! load key schedule
ldd [%o3 + 0x08], %f6
ldd [%o3 + 0x10], %f8
ldd [%o3 + 0x18], %f10
ldd [%o3 + 0x20], %f12
ldd [%o3 + 0x28], %f14
ldd [%o3 + 0x30], %f16
ldd [%o3 + 0x38], %f18
ldd [%o3 + 0x40], %f20
ldd [%o3 + 0x48], %f22
ldd [%o3 + 0x50], %f24
ldd [%o3 + 0x58], %f26
ldd [%o3 + 0x60], %f28
ldd [%o3 + 0x68], %f30
ldd [%o3 + 0x70], %f32
ldd [%o3 + 0x78], %f34
.Ldes_cbc_enc_loop:
ldx [%o0 + 0], %g4
brz,pt %g1, 4f
nop
ldx [%o0 + 8], %g5
sllx %g4, %g1, %g4
srlx %g5, %g2, %g5
or %g5, %g4, %g4
4:
.word 0x85b02304 !movxtod %g4,%f2
prefetch [%o0 + 8+63], 20
add %o0, 8, %o0
.word 0x81b08d80 !fxor %f2,%f0,%f0 ! ^= ivec
prefetch [%o1 + 63], 22
.word 0x81b02680 !des_ip %f0,%f0,,
.word 0x80c90126 !des_round %f4,%f6,%f0,%f0
.word 0x80ca012a !des_round %f8,%f10,%f0,%f0
.word 0x80cb012e !des_round %f12,%f14,%f0,%f0
.word 0x80cc0132 !des_round %f16,%f18,%f0,%f0
.word 0x80cd0136 !des_round %f20,%f22,%f0,%f0
.word 0x80ce013a !des_round %f24,%f26,%f0,%f0
.word 0x80cf013e !des_round %f28,%f30,%f0,%f0
.word 0x80c84123 !des_round %f32,%f34,%f0,%f0
.word 0x81b026a0 !des_iip %f0,%f0,,
brnz,pn %g3, 2f
sub %o2, 1, %o2
std %f0, [%o1 + 0]
brnz,pt %o2, .Ldes_cbc_enc_loop
add %o1, 8, %o1
st %f0, [%o4 + 0] ! write out ivec
retl
st %f1, [%o4 + 4]
.Lcbc_abort:
retl
nop
.align 16
2: ldxa [%o0]0x82, %g4 ! avoid read-after-write hazard
! and ~4x deterioration
! in inp==out case
.word 0x85b00900 !faligndata %f0,%f0,%f2 ! handle unaligned output
stda %f2, [%o1 + %g3]0xc0 ! partial store
add %o1, 8, %o1
orn %g0, %g3, %g3
stda %f2, [%o1 + %g3]0xc0 ! partial store
brnz,pt %o2, .Ldes_cbc_enc_loop+4
orn %g0, %g3, %g3
st %f0, [%o4 + 0] ! write out ivec
retl
st %f1, [%o4 + 4]
.type des_t4_cbc_encrypt,#function
.size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
.globl des_t4_cbc_decrypt
.align 32
des_t4_cbc_decrypt:
cmp %o2, 0
be,pn SIZE_T_CC, .Lcbc_abort
srln %o2, 0, %o2 ! needed on v8+, "nop" on v9
ld [%o4 + 0], %f2 ! load ivec
ld [%o4 + 4], %f3
and %o0, 7, %g1
andn %o0, 7, %o0
sll %g1, 3, %g1
mov 0xff, %g3
prefetch [%o0], 20
prefetch [%o0 + 63], 20
sub %g0, %g1, %g2
and %o1, 7, %g4
.word 0x93b24340 !alignaddrl %o1,%g0,%o1
srl %g3, %g4, %g3
srlx %o2, 3, %o2
movrz %g4, 0, %g3
prefetch [%o1], 22
ldd [%o3 + 0x78], %f4 ! load key schedule
ldd [%o3 + 0x70], %f6
ldd [%o3 + 0x68], %f8
ldd [%o3 + 0x60], %f10
ldd [%o3 + 0x58], %f12
ldd [%o3 + 0x50], %f14
ldd [%o3 + 0x48], %f16
ldd [%o3 + 0x40], %f18
ldd [%o3 + 0x38], %f20
ldd [%o3 + 0x30], %f22
ldd [%o3 + 0x28], %f24
ldd [%o3 + 0x20], %f26
ldd [%o3 + 0x18], %f28
ldd [%o3 + 0x10], %f30
ldd [%o3 + 0x08], %f32
ldd [%o3 + 0x00], %f34
.Ldes_cbc_dec_loop:
ldx [%o0 + 0], %g4
brz,pt %g1, 4f
nop
ldx [%o0 + 8], %g5
sllx %g4, %g1, %g4
srlx %g5, %g2, %g5
or %g5, %g4, %g4
4:
.word 0x81b02304 !movxtod %g4,%f0
prefetch [%o0 + 8+63], 20
add %o0, 8, %o0
prefetch [%o1 + 63], 22
.word 0x81b02680 !des_ip %f0,%f0,,
.word 0x80c90126 !des_round %f4,%f6,%f0,%f0
.word 0x80ca012a !des_round %f8,%f10,%f0,%f0
.word 0x80cb012e !des_round %f12,%f14,%f0,%f0
.word 0x80cc0132 !des_round %f16,%f18,%f0,%f0
.word 0x80cd0136 !des_round %f20,%f22,%f0,%f0
.word 0x80ce013a !des_round %f24,%f26,%f0,%f0
.word 0x80cf013e !des_round %f28,%f30,%f0,%f0
.word 0x80c84123 !des_round %f32,%f34,%f0,%f0
.word 0x81b026a0 !des_iip %f0,%f0,,
.word 0x81b08d80 !fxor %f2,%f0,%f0 ! ^= ivec
.word 0x85b02304 !movxtod %g4,%f2
brnz,pn %g3, 2f
sub %o2, 1, %o2
std %f0, [%o1 + 0]
brnz,pt %o2, .Ldes_cbc_dec_loop
add %o1, 8, %o1
st %f2, [%o4 + 0] ! write out ivec
retl
st %f3, [%o4 + 4]
.align 16
2: ldxa [%o0]0x82, %g4 ! avoid read-after-write hazard
! and ~4x deterioration
! in inp==out case
.word 0x81b00900 !faligndata %f0,%f0,%f0 ! handle unaligned output
stda %f0, [%o1 + %g3]0xc0 ! partial store
add %o1, 8, %o1
orn %g0, %g3, %g3
stda %f0, [%o1 + %g3]0xc0 ! partial store
brnz,pt %o2, .Ldes_cbc_dec_loop+4
orn %g0, %g3, %g3
st %f2, [%o4 + 0] ! write out ivec
retl
st %f3, [%o4 + 4]
.type des_t4_cbc_decrypt,#function
.size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
.globl des_t4_ede3_cbc_encrypt
.align 32
des_t4_ede3_cbc_encrypt:
cmp %o2, 0
be,pn SIZE_T_CC, .Lcbc_abort
srln %o2, 0, %o2 ! needed on v8+, "nop" on v9
ld [%o4 + 0], %f0 ! load ivec
ld [%o4 + 4], %f1
and %o0, 7, %g1
andn %o0, 7, %o0
sll %g1, 3, %g1
mov 0xff, %g3
prefetch [%o0], 20
prefetch [%o0 + 63], 20
sub %g0, %g1, %g2
and %o1, 7, %g4
.word 0x93b24340 !alignaddrl %o1,%g0,%o1
srl %g3, %g4, %g3
srlx %o2, 3, %o2
movrz %g4, 0, %g3
prefetch [%o1], 22
ldd [%o3 + 0x00], %f4 ! load key schedule
ldd [%o3 + 0x08], %f6
ldd [%o3 + 0x10], %f8
ldd [%o3 + 0x18], %f10
ldd [%o3 + 0x20], %f12
ldd [%o3 + 0x28], %f14
ldd [%o3 + 0x30], %f16
ldd [%o3 + 0x38], %f18
ldd [%o3 + 0x40], %f20
ldd [%o3 + 0x48], %f22
ldd [%o3 + 0x50], %f24
ldd [%o3 + 0x58], %f26
ldd [%o3 + 0x60], %f28
ldd [%o3 + 0x68], %f30
ldd [%o3 + 0x70], %f32
ldd [%o3 + 0x78], %f34
.Ldes_ede3_cbc_enc_loop:
ldx [%o0 + 0], %g4
brz,pt %g1, 4f
nop
ldx [%o0 + 8], %g5
sllx %g4, %g1, %g4
srlx %g5, %g2, %g5
or %g5, %g4, %g4
4:
.word 0x85b02304 !movxtod %g4,%f2
prefetch [%o0 + 8+63], 20
add %o0, 8, %o0
.word 0x81b08d80 !fxor %f2,%f0,%f0 ! ^= ivec
prefetch [%o1 + 63], 22
.word 0x81b02680 !des_ip %f0,%f0,,
.word 0x80c90126 !des_round %f4,%f6,%f0,%f0
.word 0x80ca012a !des_round %f8,%f10,%f0,%f0
.word 0x80cb012e !des_round %f12,%f14,%f0,%f0
.word 0x80cc0132 !des_round %f16,%f18,%f0,%f0
ldd [%o3 + 0x100-0x08], %f36
ldd [%o3 + 0x100-0x10], %f38
.word 0x80cd0136 !des_round %f20,%f22,%f0,%f0
ldd [%o3 + 0x100-0x18], %f40
ldd [%o3 + 0x100-0x20], %f42
.word 0x80ce013a !des_round %f24,%f26,%f0,%f0
ldd [%o3 + 0x100-0x28], %f44
ldd [%o3 + 0x100-0x30], %f46
.word 0x80cf013e !des_round %f28,%f30,%f0,%f0
ldd [%o3 + 0x100-0x38], %f48
ldd [%o3 + 0x100-0x40], %f50
.word 0x80c84123 !des_round %f32,%f34,%f0,%f0
ldd [%o3 + 0x100-0x48], %f52
ldd [%o3 + 0x100-0x50], %f54
.word 0x81b026a0 !des_iip %f0,%f0,,
ldd [%o3 + 0x100-0x58], %f56
ldd [%o3 + 0x100-0x60], %f58
.word 0x81b02680 !des_ip %f0,%f0,,
ldd [%o3 + 0x100-0x68], %f60
ldd [%o3 + 0x100-0x70], %f62
.word 0x80c94127 !des_round %f36,%f38,%f0,%f0
ldd [%o3 + 0x100-0x78], %f36
ldd [%o3 + 0x100-0x80], %f38
.word 0x80ca412b !des_round %f40,%f42,%f0,%f0
.word 0x80cb412f !des_round %f44,%f46,%f0,%f0
.word 0x80cc4133 !des_round %f48,%f50,%f0,%f0
ldd [%o3 + 0x100+0x00], %f40
ldd [%o3 + 0x100+0x08], %f42
.word 0x80cd4137 !des_round %f52,%f54,%f0,%f0
ldd [%o3 + 0x100+0x10], %f44
ldd [%o3 + 0x100+0x18], %f46
.word 0x80ce413b !des_round %f56,%f58,%f0,%f0
ldd [%o3 + 0x100+0x20], %f48
ldd [%o3 + 0x100+0x28], %f50
.word 0x80cf413f !des_round %f60,%f62,%f0,%f0
ldd [%o3 + 0x100+0x30], %f52
ldd [%o3 + 0x100+0x38], %f54
.word 0x80c94127 !des_round %f36,%f38,%f0,%f0
ldd [%o3 + 0x100+0x40], %f56
ldd [%o3 + 0x100+0x48], %f58
.word 0x81b026a0 !des_iip %f0,%f0,,
ldd [%o3 + 0x100+0x50], %f60
ldd [%o3 + 0x100+0x58], %f62
.word 0x81b02680 !des_ip %f0,%f0,,
ldd [%o3 + 0x100+0x60], %f36
ldd [%o3 + 0x100+0x68], %f38
.word 0x80ca412b !des_round %f40,%f42,%f0,%f0
ldd [%o3 + 0x100+0x70], %f40
ldd [%o3 + 0x100+0x78], %f42
.word 0x80cb412f !des_round %f44,%f46,%f0,%f0
.word 0x80cc4133 !des_round %f48,%f50,%f0,%f0
.word 0x80cd4137 !des_round %f52,%f54,%f0,%f0
.word 0x80ce413b !des_round %f56,%f58,%f0,%f0
.word 0x80cf413f !des_round %f60,%f62,%f0,%f0
.word 0x80c94127 !des_round %f36,%f38,%f0,%f0
.word 0x80ca412b !des_round %f40,%f42,%f0,%f0
.word 0x81b026a0 !des_iip %f0,%f0,,
brnz,pn %g3, 2f
sub %o2, 1, %o2
std %f0, [%o1 + 0]
brnz,pt %o2, .Ldes_ede3_cbc_enc_loop
add %o1, 8, %o1
st %f0, [%o4 + 0] ! write out ivec
retl
st %f1, [%o4 + 4]
.align 16
2: ldxa [%o0]0x82, %g4 ! avoid read-after-write hazard
! and ~2x deterioration
! in inp==out case
.word 0x85b00900 !faligndata %f0,%f0,%f2 ! handle unaligned output
stda %f2, [%o1 + %g3]0xc0 ! partial store
add %o1, 8, %o1
orn %g0, %g3, %g3
stda %f2, [%o1 + %g3]0xc0 ! partial store
brnz,pt %o2, .Ldes_ede3_cbc_enc_loop+4
orn %g0, %g3, %g3
st %f0, [%o4 + 0] ! write out ivec
retl
st %f1, [%o4 + 4]
.type des_t4_ede3_cbc_encrypt,#function
.size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
.globl des_t4_ede3_cbc_decrypt
.align 32
des_t4_ede3_cbc_decrypt:
cmp %o2, 0
be,pn SIZE_T_CC, .Lcbc_abort
srln %o2, 0, %o2 ! needed on v8+, "nop" on v9
ld [%o4 + 0], %f2 ! load ivec
ld [%o4 + 4], %f3
and %o0, 7, %g1
andn %o0, 7, %o0
sll %g1, 3, %g1
mov 0xff, %g3
prefetch [%o0], 20
prefetch [%o0 + 63], 20
sub %g0, %g1, %g2
and %o1, 7, %g4
.word 0x93b24340 !alignaddrl %o1,%g0,%o1
srl %g3, %g4, %g3
srlx %o2, 3, %o2
movrz %g4, 0, %g3
prefetch [%o1], 22
ldd [%o3 + 0x100+0x78], %f4 ! load key schedule
ldd [%o3 + 0x100+0x70], %f6
ldd [%o3 + 0x100+0x68], %f8
ldd [%o3 + 0x100+0x60], %f10
ldd [%o3 + 0x100+0x58], %f12
ldd [%o3 + 0x100+0x50], %f14
ldd [%o3 + 0x100+0x48], %f16
ldd [%o3 + 0x100+0x40], %f18
ldd [%o3 + 0x100+0x38], %f20
ldd [%o3 + 0x100+0x30], %f22
ldd [%o3 + 0x100+0x28], %f24
ldd [%o3 + 0x100+0x20], %f26
ldd [%o3 + 0x100+0x18], %f28
ldd [%o3 + 0x100+0x10], %f30
ldd [%o3 + 0x100+0x08], %f32
ldd [%o3 + 0x100+0x00], %f34
.Ldes_ede3_cbc_dec_loop:
ldx [%o0 + 0], %g4
brz,pt %g1, 4f
nop
ldx [%o0 + 8], %g5
sllx %g4, %g1, %g4
srlx %g5, %g2, %g5
or %g5, %g4, %g4
4:
.word 0x81b02304 !movxtod %g4,%f0
prefetch [%o0 + 8+63], 20
add %o0, 8, %o0
prefetch [%o1 + 63], 22
.word 0x81b02680 !des_ip %f0,%f0,,
.word 0x80c90126 !des_round %f4,%f6,%f0,%f0
.word 0x80ca012a !des_round %f8,%f10,%f0,%f0
.word 0x80cb012e !des_round %f12,%f14,%f0,%f0
.word 0x80cc0132 !des_round %f16,%f18,%f0,%f0
ldd [%o3 + 0x80+0x00], %f36
ldd [%o3 + 0x80+0x08], %f38
.word 0x80cd0136 !des_round %f20,%f22,%f0,%f0
ldd [%o3 + 0x80+0x10], %f40
ldd [%o3 + 0x80+0x18], %f42
.word 0x80ce013a !des_round %f24,%f26,%f0,%f0
ldd [%o3 + 0x80+0x20], %f44
ldd [%o3 + 0x80+0x28], %f46
.word 0x80cf013e !des_round %f28,%f30,%f0,%f0
ldd [%o3 + 0x80+0x30], %f48
ldd [%o3 + 0x80+0x38], %f50
.word 0x80c84123 !des_round %f32,%f34,%f0,%f0
ldd [%o3 + 0x80+0x40], %f52
ldd [%o3 + 0x80+0x48], %f54
.word 0x81b026a0 !des_iip %f0,%f0,,
ldd [%o3 + 0x80+0x50], %f56
ldd [%o3 + 0x80+0x58], %f58
.word 0x81b02680 !des_ip %f0,%f0,,
ldd [%o3 + 0x80+0x60], %f60
ldd [%o3 + 0x80+0x68], %f62
.word 0x80c94127 !des_round %f36,%f38,%f0,%f0
ldd [%o3 + 0x80+0x70], %f36
ldd [%o3 + 0x80+0x78], %f38
.word 0x80ca412b !des_round %f40,%f42,%f0,%f0
.word 0x80cb412f !des_round %f44,%f46,%f0,%f0
.word 0x80cc4133 !des_round %f48,%f50,%f0,%f0
ldd [%o3 + 0x80-0x08], %f40
ldd [%o3 + 0x80-0x10], %f42
.word 0x80cd4137 !des_round %f52,%f54,%f0,%f0
ldd [%o3 + 0x80-0x18], %f44
ldd [%o3 + 0x80-0x20], %f46
.word 0x80ce413b !des_round %f56,%f58,%f0,%f0
ldd [%o3 + 0x80-0x28], %f48
ldd [%o3 + 0x80-0x30], %f50
.word 0x80cf413f !des_round %f60,%f62,%f0,%f0
ldd [%o3 + 0x80-0x38], %f52
ldd [%o3 + 0x80-0x40], %f54
.word 0x80c94127 !des_round %f36,%f38,%f0,%f0
ldd [%o3 + 0x80-0x48], %f56
ldd [%o3 + 0x80-0x50], %f58
.word 0x81b026a0 !des_iip %f0,%f0,,
ldd [%o3 + 0x80-0x58], %f60
ldd [%o3 + 0x80-0x60], %f62
.word 0x81b02680 !des_ip %f0,%f0,,
ldd [%o3 + 0x80-0x68], %f36
ldd [%o3 + 0x80-0x70], %f38
.word 0x80ca412b !des_round %f40,%f42,%f0,%f0
ldd [%o3 + 0x80-0x78], %f40
ldd [%o3 + 0x80-0x80], %f42
.word 0x80cb412f !des_round %f44,%f46,%f0,%f0
.word 0x80cc4133 !des_round %f48,%f50,%f0,%f0
.word 0x80cd4137 !des_round %f52,%f54,%f0,%f0
.word 0x80ce413b !des_round %f56,%f58,%f0,%f0
.word 0x80cf413f !des_round %f60,%f62,%f0,%f0
.word 0x80c94127 !des_round %f36,%f38,%f0,%f0
.word 0x80ca412b !des_round %f40,%f42,%f0,%f0
.word 0x81b026a0 !des_iip %f0,%f0,,
.word 0x81b08d80 !fxor %f2,%f0,%f0 ! ^= ivec
.word 0x85b02304 !movxtod %g4,%f2
brnz,pn %g3, 2f
sub %o2, 1, %o2
std %f0, [%o1 + 0]
brnz,pt %o2, .Ldes_ede3_cbc_dec_loop
add %o1, 8, %o1
st %f2, [%o4 + 0] ! write out ivec
retl
st %f3, [%o4 + 4]
.align 16
2: ldxa [%o0]0x82, %g4 ! avoid read-after-write hazard
! and ~3x deterioration
! in inp==out case
.word 0x81b00900 !faligndata %f0,%f0,%f0 ! handle unaligned output
stda %f0, [%o1 + %g3]0xc0 ! partial store
add %o1, 8, %o1
orn %g0, %g3, %g3
stda %f0, [%o1 + %g3]0xc0 ! partial store
brnz,pt %o2, .Ldes_ede3_cbc_dec_loop+4
orn %g0, %g3, %g3
st %f2, [%o4 + 0] ! write out ivec
retl
st %f3, [%o4 + 4]
.type des_t4_ede3_cbc_decrypt,#function
.size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
.asciz "DES for SPARC T4, David S. Miller, Andy Polyakov"
.align 4