154 lines
3.9 KiB
ArmAsm
154 lines
3.9 KiB
ArmAsm
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
//
|
|
// Permission to use, copy, modify, and/or distribute this software for any
|
|
// purpose with or without fee is hereby granted, provided that the above
|
|
// copyright notice and this permission notice appear in all copies.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// Subtract, z := x - y
|
|
// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
|
|
//
|
|
// extern uint64_t bignum_sub
|
|
// (uint64_t p, uint64_t *z,
|
|
// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
|
|
//
|
|
// Does the z := x - y operation, truncating modulo p words in general and
|
|
// returning a top borrow (0 or 1) in the p'th place, only subtracting input
|
|
// words below p (as well as m and n respectively) to get the diff and borrow.
|
|
//
|
|
// Standard x86-64 ABI: RDI = p, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y, returns RAX
|
|
// Microsoft x64 ABI: RCX = p, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y, returns RAX
|
|
// ----------------------------------------------------------------------------
|
|
|
|
#include "s2n_bignum_internal.h"
|
|
|
|
.intel_syntax noprefix
|
|
S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub)
|
|
S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub)
|
|
.text
|
|
|
|
#define p rdi
|
|
#define z rsi
|
|
#define m rdx
|
|
#define x rcx
|
|
#define n r8
|
|
#define y r9
|
|
#define i r10
|
|
#define a rax
|
|
|
|
#define ashort eax
|
|
|
|
|
|
|
|
S2N_BN_SYMBOL(bignum_sub):
|
|
endbr64
|
|
|
|
#if WINDOWS_ABI
|
|
push rdi
|
|
push rsi
|
|
mov rdi, rcx
|
|
mov rsi, rdx
|
|
mov rdx, r8
|
|
mov rcx, r9
|
|
mov r8, [rsp+56]
|
|
mov r9, [rsp+64]
|
|
#endif
|
|
|
|
// Zero the main index counter for both branches
|
|
|
|
xor i, i
|
|
|
|
// First clamp the two input sizes m := min(p,m) and n := min(p,n) since
|
|
// we'll never need words past the p'th. Can now assume m <= p and n <= p.
|
|
// Then compare the modified m and n and branch accordingly
|
|
|
|
cmp p, m
|
|
cmovc m, p
|
|
cmp p, n
|
|
cmovc n, p
|
|
cmp m, n
|
|
jc ylonger
|
|
|
|
// The case where x is longer or of the same size (p >= m >= n)
|
|
|
|
sub p, m
|
|
sub m, n
|
|
inc m
|
|
test n, n
|
|
jz xtest
|
|
xmainloop:
|
|
mov a, [x+8*i]
|
|
sbb a, [y+8*i]
|
|
mov [z+8*i],a
|
|
inc i
|
|
dec n
|
|
jnz xmainloop
|
|
jmp xtest
|
|
xtoploop:
|
|
mov a, [x+8*i]
|
|
sbb a, 0
|
|
mov [z+8*i],a
|
|
inc i
|
|
xtest:
|
|
dec m
|
|
jnz xtoploop
|
|
sbb a, a
|
|
test p, p
|
|
jz tailskip
|
|
tailloop:
|
|
mov [z+8*i],a
|
|
inc i
|
|
dec p
|
|
jnz tailloop
|
|
tailskip:
|
|
neg a
|
|
#if WINDOWS_ABI
|
|
pop rsi
|
|
pop rdi
|
|
#endif
|
|
ret
|
|
|
|
// The case where y is longer (p >= n > m)
|
|
|
|
ylonger:
|
|
|
|
sub p, n
|
|
sub n, m
|
|
test m, m
|
|
jz ytoploop
|
|
ymainloop:
|
|
mov a, [x+8*i]
|
|
sbb a, [y+8*i]
|
|
mov [z+8*i],a
|
|
inc i
|
|
dec m
|
|
jnz ymainloop
|
|
ytoploop:
|
|
mov ashort, 0
|
|
sbb a, [y+8*i]
|
|
mov [z+8*i],a
|
|
inc i
|
|
dec n
|
|
jnz ytoploop
|
|
sbb a, a
|
|
test p, p
|
|
jnz tailloop
|
|
neg a
|
|
#if WINDOWS_ABI
|
|
pop rsi
|
|
pop rdi
|
|
#endif
|
|
ret
|
|
|
|
#if defined(__linux__) && defined(__ELF__)
|
|
.section .note.GNU-stack,"",%progbits
|
|
#endif
|