Replace the incomplete GPL licensed ARM arithmetic support routines with the ones from the EDK2 project. These cover long long multiplication and long long logical shift as well.
Also remove the special case for small dividends in DivU64x32: we can simply let the compiler handle this, and emit calls to the support routines where appropriate.
Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org --- inc/arm/efibind.h | 9 +- lib/Makefile | 3 +- lib/arm/div.S | 155 +++++++++++ lib/arm/div64.S | 193 -------------- lib/arm/edk2asm.h | 6 + lib/arm/ldivmod.S | 61 +++++ lib/arm/lib1funcs.S | 279 -------------------- lib/arm/llsl.S | 41 +++ lib/arm/llsr.S | 41 +++ lib/arm/mullu.S | 33 +++ lib/arm/uldiv.S | 267 +++++++++++++++++++ 11 files changed, 608 insertions(+), 480 deletions(-)
diff --git a/inc/arm/efibind.h b/inc/arm/efibind.h index 798212c006c6..cc4b5c598bf6 100644 --- a/inc/arm/efibind.h +++ b/inc/arm/efibind.h @@ -125,13 +125,8 @@ typedef uint32_t UINTN; #define uefi_call_wrapper(func, va_num, ...) func(__VA_ARGS__) #define EFI_FUNCTION
-extern UINT64 __DivU64x32(UINT64 Dividend, UINTN Divisor, UINTN *Remainder); - static inline UINT64 DivU64x32(UINT64 Dividend, UINTN Divisor, UINTN *Remainder) { - if (Dividend >> 32) - return __DivU64x32(Dividend, Divisor, Remainder); - /* * GCC turns a division into a multiplication and shift with precalculated * constants if the divisor is constant and the dividend fits into a 32 bit @@ -139,7 +134,7 @@ static inline UINT64 DivU64x32(UINT64 Dividend, UINTN Divisor, UINTN *Remainder) * library functions. */ if (Remainder) - *Remainder = (UINTN)Dividend % Divisor; - Dividend = (UINTN)Dividend / Divisor; + *Remainder = Dividend % Divisor; + Dividend = Dividend / Divisor; return Dividend; } diff --git a/lib/Makefile b/lib/Makefile index 622730fbcd65..a9ba4e2caba5 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -58,7 +58,8 @@ FILES += $(ARCH)/callwrap $(ARCH)/efi_stub endif
ifeq ($(ARCH),arm) -FILES += $(ARCH)/lib1funcs $(ARCH)/div64 +FILES += $(ARCH)/uldiv $(ARCH)/ldivmod $(ARCH)/div $(ARCH)/llsl $(ARCH)/llsr \ + $(ARCH)/mullu endif
OBJS = $(FILES:%=%.o) diff --git a/lib/arm/div.S b/lib/arm/div.S new file mode 100644 index 000000000000..71158b6f6214 --- /dev/null +++ b/lib/arm/div.S @@ -0,0 +1,155 @@ +#------------------------------------------------------------------------------ +# +# Copyright (c) 2011, ARM. All rights reserved.<BR> +# +# This program and the accompanying materials +# are licensed and made available under the terms and conditions of the BSD License +# which accompanies this distribution. The full text of the license may be found at +# http://opensource.org/licenses/bsd-license.php +# +# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +# +#------------------------------------------------------------------------------ + +#include "edk2asm.h" + +.text +.align 2 +GCC_ASM_EXPORT(__aeabi_uidiv) +GCC_ASM_EXPORT(__aeabi_uidivmod) +GCC_ASM_EXPORT(__aeabi_idiv) +GCC_ASM_EXPORT(__aeabi_idivmod) + +# AREA Math, CODE, READONLY + +# +#UINT32 +#EFIAPI +#__aeabi_uidivmode ( +# IN UINT32 Dividen +# IN UINT32 Divisor +# ); +# + +ASM_PFX(__aeabi_uidiv): +ASM_PFX(__aeabi_uidivmod): + rsbs r12, r1, r0, LSR #4 + mov r2, #0 + bcc ASM_PFX(__arm_div4) + rsbs r12, r1, r0, LSR #8 + bcc ASM_PFX(__arm_div8) + mov r3, #0 + b ASM_PFX(__arm_div_large) + +# +#INT32 +#EFIAPI +#__aeabi_idivmode ( +# IN INT32 Dividen +# IN INT32 Divisor +# ); +# +ASM_PFX(__aeabi_idiv): +ASM_PFX(__aeabi_idivmod): + orrs r12, r0, r1 + bmi ASM_PFX(__arm_div_negative) + rsbs r12, r1, r0, LSR #1 + mov r2, #0 + bcc ASM_PFX(__arm_div1) + rsbs r12, r1, r0, LSR #4 + bcc ASM_PFX(__arm_div4) + rsbs r12, r1, r0, LSR #8 + bcc ASM_PFX(__arm_div8) + mov r3, #0 + b ASM_PFX(__arm_div_large) +ASM_PFX(__arm_div8): + rsbs r12, r1, r0, LSR #7 + subcs r0, r0, r1, LSL #7 + adc r2, r2, r2 + rsbs r12, r1, r0,LSR #6 + subcs r0, r0, r1, LSL #6 + adc r2, r2, r2 + rsbs r12, r1, r0, LSR #5 + subcs r0, r0, r1, LSL #5 + adc r2, r2, r2 + rsbs r12, r1, r0, LSR #4 + subcs r0, r0, r1, LSL #4 + adc r2, r2, r2 +ASM_PFX(__arm_div4): + rsbs r12, r1, r0, LSR #3 + subcs r0, r0, r1, LSL #3 + adc r2, r2, r2 + rsbs r12, r1, r0, LSR #2 + subcs r0, r0, r1, LSL #2 + adcs r2, r2, r2 + rsbs r12, r1, r0, LSR #1 + subcs r0, r0, r1, LSL #1 + adc r2, r2, r2 +ASM_PFX(__arm_div1): + subs r1, r0, r1 + movcc r1, r0 + adc r0, r2, r2 + bx r14 +ASM_PFX(__arm_div_negative): + ands r2, r1, #0x80000000 + rsbmi r1, r1, #0 + eors r3, r2, r0, ASR #32 + rsbcs r0, r0, #0 + rsbs r12, r1, r0, LSR #4 + bcc label1 + rsbs r12, r1, r0, LSR #8 + bcc label2 +ASM_PFX(__arm_div_large): + lsl r1, r1, #6 + rsbs r12, r1, r0, LSR #8 + orr r2, r2, #0xfc000000 + bcc label2 + lsl r1, r1, #6 + rsbs r12, r1, r0, LSR #8 + orr r2, r2, #0x3f00000 + bcc label2 + lsl r1, r1, #6 + rsbs r12, r1, r0, LSR #8 + orr r2, r2, #0xfc000 + orrcs r2, r2, #0x3f00 + lslcs r1, r1, #6 + rsbs r12, r1, #0 + bcs ASM_PFX(__aeabi_idiv0) +label3: + lsrcs r1, r1, #6 +label2: + rsbs r12, r1, r0, LSR #7 + subcs r0, r0, r1, LSL #7 + adc r2, r2, r2 + rsbs r12, r1, r0, LSR #6 + subcs r0, r0, r1, LSL #6 + adc r2, r2, r2 + rsbs r12, r1, r0, LSR #5 + subcs r0, r0, r1, LSL #5 + adc r2, r2, r2 + rsbs r12, r1, r0, LSR #4 + subcs r0, r0, r1, LSL #4 + adc r2, r2, r2 +label1: + rsbs r12, r1, r0, LSR #3 + subcs r0, r0, r1, LSL #3 + adc r2, r2, r2 + rsbs r12, r1, r0, LSR #2 + subcs r0, r0, r1, LSL #2 + adcs r2, r2, r2 + bcs label3 + rsbs r12, r1, r0, LSR #1 + subcs r0, r0, r1, LSL #1 + adc r2, r2, r2 + subs r1, r0, r1 + movcc r1, r0 + adc r0, r2, r2 + asrs r3, r3, #31 + rsbmi r0, r0, #0 + rsbcs r1, r1, #0 + bx r14 + + @ What to do about division by zero? For now, just return. +ASM_PFX(__aeabi_idiv0): + bx r14 diff --git a/lib/arm/div64.S b/lib/arm/div64.S deleted file mode 100644 index f67ba770c948..000000000000 --- a/lib/arm/div64.S +++ /dev/null @@ -1,193 +0,0 @@ -/* - * linux/arch/arm/lib/div64.S - * - * Optimized computation of 64-bit dividend / 32-bit divisor - * - * Author: Nicolas Pitre - * Created: Oct 5, 2003 - * Copyright: Monta Vista Software, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#define xl r0 -#define xh r1 -#define yl r2 -#define yh r3 - -#define UNWIND(x...) -#define ARM(x...) x -#define THUMB(x...) - -#define ENTRY(__f) \ - .align 3 ;\ - .globl __f ;\ - .type __f,%function ;\ -__f: -#define ENDPROC(__f) ;\ - .size __f, . - __f - - -/* -UINT64 -DivU64x32 ( - IN UINT64 Dividend, - IN UINTN Divisor, - OUT UINTN *Remainder OPTIONAL - ) -// divide 64bit by 32bit and get a 64bit result -// N.B. only works for 31bit divisors!! -{ -} -*/ - -ENTRY(__DivU64x32) - stmfd sp!, {r4-r6, lr} - - mov r5, r4 @ preserve Remainder - mov r4, r2 @ divisor in r4 - bl __do_div64 - - teq r5, #0 - strne xh, [r5] - mov r0, yl - mov r1, yh - ldmfd sp!, {r4-r6, pc} -ENDPROC(__DivU64x32) - -/* - * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. - * - * Note: Calling convention is totally non standard for optimal code. - * This is meant to be used by do_div() from include/asm/div64.h only. - * - * Input parameters: - * xh-xl = dividend (clobbered) - * r4 = divisor (preserved) - * - * Output values: - * yh-yl = result - * xh = remainder - * - * Clobbered regs: xl, ip - */ - -ENTRY(__do_div64) -UNWIND(.fnstart) - - @ Test for easy paths first. - subs ip, r4, #1 - bls 9f @ divisor is 0 or 1 - tst ip, r4 - beq 8f @ divisor is power of 2 - - @ See if we need to handle upper 32-bit result. - cmp xh, r4 - mov yh, #0 - blo 3f - - @ Align divisor with upper part of dividend. - @ The aligned divisor is stored in yl preserving the original. - @ The bit position is stored in ip. - - clz yl, r4 - clz ip, xh - sub yl, yl, ip - mov ip, #1 - mov ip, ip, lsl yl - mov yl, r4, lsl yl - - @ The division loop for needed upper bit positions. - @ Break out early if dividend reaches 0. -2: cmp xh, yl - orrcs yh, yh, ip - subcss xh, xh, yl - movnes ip, ip, lsr #1 - mov yl, yl, lsr #1 - bne 2b - - @ See if we need to handle lower 32-bit result. -3: cmp xh, #0 - mov yl, #0 - cmpeq xl, r4 - movlo xh, xl - movlo pc, lr - - @ The division loop for lower bit positions. - @ Here we shift remainer bits leftwards rather than moving the - @ divisor for comparisons, considering the carry-out bit as well. - mov ip, #0x80000000 -4: movs xl, xl, lsl #1 - adcs xh, xh, xh - beq 6f - cmpcc xh, r4 -5: orrcs yl, yl, ip - subcs xh, xh, r4 - movs ip, ip, lsr #1 - bne 4b - mov pc, lr - - @ The top part of remainder became zero. If carry is set - @ (the 33th bit) this is a false positive so resume the loop. - @ Otherwise, if lower part is also null then we are done. -6: bcs 5b - cmp xl, #0 - moveq pc, lr - - @ We still have remainer bits in the low part. Bring them up. - - clz xh, xl @ we know xh is zero here so... - add xh, xh, #1 - mov xl, xl, lsl xh - mov ip, ip, lsr xh - - @ Current remainder is now 1. It is worthless to compare with - @ divisor at this point since divisor can not be smaller than 3 here. - @ If possible, branch for another shift in the division loop. - @ If no bit position left then we are done. - movs ip, ip, lsr #1 - mov xh, #1 - bne 4b - mov pc, lr - -8: @ Division by a power of 2: determine what that divisor order is - @ then simply shift values around - - clz ip, r4 - rsb ip, ip, #31 - - mov yh, xh, lsr ip - mov yl, xl, lsr ip - rsb ip, ip, #32 - ARM( orr yl, yl, xh, lsl ip ) - THUMB( lsl xh, xh, ip ) - THUMB( orr yl, yl, xh ) - mov xh, xl, lsl ip - mov xh, xh, lsr ip - mov pc, lr - - @ eq -> division by 1: obvious enough... -9: moveq yl, xl - moveq yh, xh - moveq xh, #0 - moveq pc, lr -UNWIND(.fnend) - -UNWIND(.fnstart) -UNWIND(.pad #4) -UNWIND(.save {lr}) -Ldiv0_64: - @ Division by 0: - str lr, [sp, #-8]! - bl __div0 - - @ as wrong as it could be... - mov yl, #0 - mov yh, #0 - mov xh, #0 - ldr pc, [sp], #8 - -UNWIND(.fnend) -ENDPROC(__do_div64) diff --git a/lib/arm/edk2asm.h b/lib/arm/edk2asm.h new file mode 100644 index 000000000000..9515eaf77f34 --- /dev/null +++ b/lib/arm/edk2asm.h @@ -0,0 +1,6 @@ + +#define ASM_PFX(x) x +#define GCC_ASM_EXPORT(x) \ + .globl x ; \ + .type x, %function + diff --git a/lib/arm/ldivmod.S b/lib/arm/ldivmod.S new file mode 100644 index 000000000000..edbf89ed58b5 --- /dev/null +++ b/lib/arm/ldivmod.S @@ -0,0 +1,61 @@ +//------------------------------------------------------------------------------ +// +// Copyright (c) 2008 - 2009, Apple Inc. All rights reserved.<BR> +// +// This program and the accompanying materials +// are licensed and made available under the terms and conditions of the BSD License +// which accompanies this distribution. The full text of the license may be found at +// http://opensource.org/licenses/bsd-license.php +// +// THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +// WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +// +//------------------------------------------------------------------------------ + + +#include "edk2asm.h" + + .text + .align 2 + GCC_ASM_EXPORT(__aeabi_ldivmod) + +// +// A pair of (unsigned) long longs is returned in {{r0, r1}, {r2, r3}}, +// the quotient in {r0, r1}, and the remainder in {r2, r3}. +// +//__value_in_regs lldiv_t +//EFIAPI +//__aeabi_ldivmod ( +// IN UINT64 Dividen +// IN UINT64 Divisor +// )// +// + +ASM_PFX(__aeabi_ldivmod): + push {r4,lr} + asrs r4,r1,#1 + eor r4,r4,r3,LSR #1 + bpl L_Test1 + rsbs r0,r0,#0 + rsc r1,r1,#0 +L_Test1: + tst r3,r3 + bpl L_Test2 + rsbs r2,r2,#0 + rsc r3,r3,#0 +L_Test2: + bl ASM_PFX(__aeabi_uldivmod) + tst r4,#0x40000000 + beq L_Test3 + rsbs r0,r0,#0 + rsc r1,r1,#0 +L_Test3: + tst r4,#0x80000000 + beq L_Exit + rsbs r2,r2,#0 + rsc r3,r3,#0 +L_Exit: + pop {r4,pc} + + + diff --git a/lib/arm/lib1funcs.S b/lib/arm/lib1funcs.S deleted file mode 100644 index 6b4d4bf55fb0..000000000000 --- a/lib/arm/lib1funcs.S +++ /dev/null @@ -1,279 +0,0 @@ -/* - * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines - * - * Author: Nicolas Pitre nico@fluxnic.net - * - contributed to gcc-3.4 on Sep 30, 2003 - * - adapted for the Linux kernel on Oct 2, 2003 - */ - -/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any -later version. - -In addition to the permissions in the GNU General Public License, the -Free Software Foundation gives you unlimited permission to link the -compiled version of this file into combinations with other programs, -and to distribute those combinations without any restriction coming -from the use of this file. (The General Public License restrictions -do apply in other respects; for example, they cover modification of -the file, and distribution when not linked into a combine -executable.) - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; see the file COPYING. If not, write to -the Free Software Foundation, 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ - -#define UNWIND(x...) - -#define ENTRY(__f) \ - .align 3 ;\ - .globl __f ;\ - .type __f,%function ;\ -__f: -#define ENDPROC(__f) ;\ - .size __f, . - __f - -.macro ARM_DIV_BODY dividend, divisor, result, curbit - - clz \curbit, \divisor - clz \result, \dividend - sub \result, \curbit, \result - mov \curbit, #1 - mov \divisor, \divisor, lsl \result - mov \curbit, \curbit, lsl \result - mov \result, #0 - - @ Division loop -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - orrhs \result, \result, \curbit - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - orrhs \result, \result, \curbit, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - orrhs \result, \result, \curbit, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - orrhs \result, \result, \curbit, lsr #3 - cmp \dividend, #0 @ Early termination? - movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? - movne \divisor, \divisor, lsr #4 - bne 1b - -.endm - - -.macro ARM_DIV2_ORDER divisor, order - - clz \order, \divisor - rsb \order, \order, #31 - -.endm - - -.macro ARM_MOD_BODY dividend, divisor, order, spare - - clz \order, \divisor - clz \spare, \dividend - sub \order, \order, \spare - mov \divisor, \divisor, lsl \order - - @ Perform all needed substractions to keep only the reminder. - @ Do comparisons in batch of 4 first. - subs \order, \order, #3 @ yes, 3 is intended here - blt 2f - -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - cmp \dividend, #1 - mov \divisor, \divisor, lsr #4 - subges \order, \order, #4 - bge 1b - - tst \order, #3 - teqne \dividend, #0 - beq 5f - - @ Either 1, 2 or 3 comparison/substractions are left. -2: cmn \order, #2 - blt 4f - beq 3f - cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -3: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -4: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor -5: -.endm - - -ENTRY(__aeabi_uidiv) -ENTRY(__udivsi3) - - subs r2, r1, #1 - moveq pc, lr - bcc Ldiv0 - cmp r0, r1 - bls 11f - tst r1, r2 - beq 12f - - ARM_DIV_BODY r0, r1, r2, r3 - - mov r0, r2 - mov pc, lr - -11: moveq r0, #1 - movne r0, #0 - mov pc, lr - -12: ARM_DIV2_ORDER r1, r2 - - mov r0, r0, lsr r2 - mov pc, lr - -UNWIND(.fnend) -ENDPROC(__udivsi3) -ENDPROC(__aeabi_uidiv) - -ENTRY(__umodsi3) -UNWIND(.fnstart) - - subs r2, r1, #1 @ compare divisor with 1 - bcc Ldiv0 - cmpne r0, r1 @ compare dividend with divisor - moveq r0, #0 - tsthi r1, r2 @ see if divisor is power of 2 - andeq r0, r0, r2 - movls pc, lr - - ARM_MOD_BODY r0, r1, r2, r3 - - mov pc, lr - -UNWIND(.fnend) -ENDPROC(__umodsi3) - -ENTRY(__divsi3) -ENTRY(__aeabi_idiv) -UNWIND(.fnstart) - - cmp r1, #0 - eor ip, r0, r1 @ save the sign of the result. - beq Ldiv0 - rsbmi r1, r1, #0 @ loops below use unsigned. - subs r2, r1, #1 @ division by 1 or -1 ? - beq 10f - movs r3, r0 - rsbmi r3, r0, #0 @ positive dividend value - cmp r3, r1 - bls 11f - tst r1, r2 @ divisor is power of 2 ? - beq 12f - - ARM_DIV_BODY r3, r1, r0, r2 - - cmp ip, #0 - rsbmi r0, r0, #0 - mov pc, lr - -10: teq ip, r0 @ same sign ? - rsbmi r0, r0, #0 - mov pc, lr - -11: movlo r0, #0 - moveq r0, ip, asr #31 - orreq r0, r0, #1 - mov pc, lr - -12: ARM_DIV2_ORDER r1, r2 - - cmp ip, #0 - mov r0, r3, lsr r2 - rsbmi r0, r0, #0 - mov pc, lr - -UNWIND(.fnend) -ENDPROC(__divsi3) -ENDPROC(__aeabi_idiv) - -ENTRY(__modsi3) -UNWIND(.fnstart) - - cmp r1, #0 - beq Ldiv0 - rsbmi r1, r1, #0 @ loops below use unsigned. - movs ip, r0 @ preserve sign of dividend - rsbmi r0, r0, #0 @ if negative make positive - subs r2, r1, #1 @ compare divisor with 1 - cmpne r0, r1 @ compare dividend with divisor - moveq r0, #0 - tsthi r1, r2 @ see if divisor is power of 2 - andeq r0, r0, r2 - bls 10f - - ARM_MOD_BODY r0, r1, r2, r3 - -10: cmp ip, #0 - rsbmi r0, r0, #0 - mov pc, lr - -UNWIND(.fnend) -ENDPROC(__modsi3) - -ENTRY(__aeabi_uidivmod) -UNWIND(.fnstart) -UNWIND(.save {r0, r1, ip, lr} ) - - stmfd sp!, {r0, r1, ip, lr} - bl __aeabi_uidiv - ldmfd sp!, {r1, r2, ip, lr} - mul r3, r0, r2 - sub r1, r1, r3 - mov pc, lr - -UNWIND(.fnend) -ENDPROC(__aeabi_uidivmod) - -ENTRY(__aeabi_idivmod) -UNWIND(.fnstart) -UNWIND(.save {r0, r1, ip, lr} ) - stmfd sp!, {r0, r1, ip, lr} - bl __aeabi_idiv - ldmfd sp!, {r1, r2, ip, lr} - mul r3, r0, r2 - sub r1, r1, r3 - mov pc, lr - -UNWIND(.fnend) -ENDPROC(__aeabi_idivmod) - -Ldiv0: -UNWIND(.fnstart) -UNWIND(.pad #4) -UNWIND(.save {lr}) - str lr, [sp, #-8]! - bl __div0 - mov r0, #0 @ About as wrong as it could be. - ldr pc, [sp], #8 -UNWIND(.fnend) -ENDPROC(Ldiv0) diff --git a/lib/arm/llsl.S b/lib/arm/llsl.S new file mode 100644 index 000000000000..0f5c4078b975 --- /dev/null +++ b/lib/arm/llsl.S @@ -0,0 +1,41 @@ +#------------------------------------------------------------------------------ +# +# Copyright (c) 2013, ARM. All rights reserved.<BR> +# +# This program and the accompanying materials +# are licensed and made available under the terms and conditions of the BSD License +# which accompanies this distribution. The full text of the license may be found at +# http://opensource.org/licenses/bsd-license.php +# +# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +# +#------------------------------------------------------------------------------ + +#include "edk2asm.h" + +.text +.align 2 +GCC_ASM_EXPORT(__aeabi_llsl) + +# +#VOID +#EFIAPI +#__aeabi_llsl ( +# IN VOID *Destination, +# IN VOID *Source, +# IN UINT32 Size +# ); +# +ASM_PFX(__aeabi_llsl): + subs r3,r2,#0x20 + bpl 1f + rsb r3,r2,#0x20 + lsl r1,r1,r2 + orr r1,r1,r0,lsr r3 + lsl r0,r0,r2 + bx lr +1: + lsl r1,r0,r3 + mov r0,#0 + bx lr diff --git a/lib/arm/llsr.S b/lib/arm/llsr.S new file mode 100644 index 000000000000..432b27d7ac7f --- /dev/null +++ b/lib/arm/llsr.S @@ -0,0 +1,41 @@ +#------------------------------------------------------------------------------ +# +# Copyright (c) 2013, ARM. All rights reserved.<BR> +# +# This program and the accompanying materials +# are licensed and made available under the terms and conditions of the BSD License +# which accompanies this distribution. The full text of the license may be found at +# http://opensource.org/licenses/bsd-license.php +# +# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +# +#------------------------------------------------------------------------------ + +#include "edk2asm.h" + +.text +.align 2 +GCC_ASM_EXPORT(__aeabi_llsr) + +# +#VOID +#EFIAPI +#__aeabi_llsr ( +# IN VOID *Destination, +# IN VOID *Source, +# IN UINT32 Size +# ); +# +ASM_PFX(__aeabi_llsr): + subs r3,r2,#0x20 + bpl 1f + rsb r3,r2,#0x20 + lsr r0,r0,r2 + orr r0,r0,r1,lsl r3 + lsr r1,r1,r2 + bx lr +1: + lsr r0,r1,r3 + mov r1,#0 + bx lr diff --git a/lib/arm/mullu.S b/lib/arm/mullu.S new file mode 100644 index 000000000000..39b9a80bd268 --- /dev/null +++ b/lib/arm/mullu.S @@ -0,0 +1,33 @@ +#------------------------------------------------------------------------------ +# +# Copyright (c) 2008 - 2009, Apple Inc. All rights reserved.<BR> +# +# This program and the accompanying materials +# are licensed and made available under the terms and conditions of the BSD License +# which accompanies this distribution. The full text of the license may be found at +# http://opensource.org/licenses/bsd-license.php +# +# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +# +#------------------------------------------------------------------------------ + +#include "edk2asm.h" + +.text +GCC_ASM_EXPORT(__aeabi_lmul) +# +#INT64 +#EFIAPI +#__aeabi_lmul ( +# IN INT64 Multiplicand +# IN INT64 Multiplier +# ); +# +ASM_PFX(__aeabi_lmul): + stmdb sp!, {lr} + mov lr, r0 + umull r0, ip, r2, lr + mla r1, r2, r1, ip + mla r1, r3, lr, r1 + ldmia sp!, {pc} diff --git a/lib/arm/uldiv.S b/lib/arm/uldiv.S new file mode 100644 index 000000000000..f478898d23c6 --- /dev/null +++ b/lib/arm/uldiv.S @@ -0,0 +1,267 @@ +//------------------------------------------------------------------------------ +// +// Copyright (c) 2008 - 2009, Apple Inc. All rights reserved.<BR> +// +// This program and the accompanying materials +// are licensed and made available under the terms and conditions of the BSD License +// which accompanies this distribution. The full text of the license may be found at +// http://opensource.org/licenses/bsd-license.php +// +// THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +// WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. +// +//------------------------------------------------------------------------------ + +#include "edk2asm.h" + + .text + .align 2 + GCC_ASM_EXPORT(__aeabi_uldivmod) + +// +//UINT64 +//EFIAPI +//__aeabi_uldivmod ( +// IN UINT64 Dividend +// IN UINT64 Divisor +// ) +// +ASM_PFX(__aeabi_uldivmod): + stmdb sp!, {r4, r5, r6, lr} + mov r4, r1 + mov r5, r0 + mov r6, #0 // 0x0 + orrs ip, r3, r2, lsr #31 + bne ASM_PFX(__aeabi_uldivmod_label1) + tst r2, r2 + beq ASM_PFX(_ll_div0) + movs ip, r2, lsr #15 + addeq r6, r6, #16 // 0x10 + mov ip, r2, lsl r6 + movs lr, ip, lsr #23 + moveq ip, ip, lsl #8 + addeq r6, r6, #8 // 0x8 + movs lr, ip, lsr #27 + moveq ip, ip, lsl #4 + addeq r6, r6, #4 // 0x4 + movs lr, ip, lsr #29 + moveq ip, ip, lsl #2 + addeq r6, r6, #2 // 0x2 + movs lr, ip, lsr #30 + moveq ip, ip, lsl #1 + addeq r6, r6, #1 // 0x1 + b ASM_PFX(_ll_udiv_small) +ASM_PFX(__aeabi_uldivmod_label1): + tst r3, #-2147483648 // 0x80000000 + bne ASM_PFX(__aeabi_uldivmod_label2) + movs ip, r3, lsr #15 + addeq r6, r6, #16 // 0x10 + mov ip, r3, lsl r6 + movs lr, ip, lsr #23 + moveq ip, ip, lsl #8 + addeq r6, r6, #8 // 0x8 + movs lr, ip, lsr #27 + moveq ip, ip, lsl #4 + addeq r6, r6, #4 // 0x4 + movs lr, ip, lsr #29 + moveq ip, ip, lsl #2 + addeq r6, r6, #2 // 0x2 + movs lr, ip, lsr #30 + addeq r6, r6, #1 // 0x1 + rsb r3, r6, #32 // 0x20 + moveq ip, ip, lsl #1 + orr ip, ip, r2, lsr r3 + mov lr, r2, lsl r6 + b ASM_PFX(_ll_udiv_big) +ASM_PFX(__aeabi_uldivmod_label2): + mov ip, r3 + mov lr, r2 + b ASM_PFX(_ll_udiv_ginormous) + +ASM_PFX(_ll_udiv_small): + cmp r4, ip, lsl #1 + mov r3, #0 // 0x0 + subcs r4, r4, ip, lsl #1 + addcs r3, r3, #2 // 0x2 + cmp r4, ip + subcs r4, r4, ip + adcs r3, r3, #0 // 0x0 + add r2, r6, #32 // 0x20 + cmp r2, #32 // 0x20 + rsb ip, ip, #0 // 0x0 + bcc ASM_PFX(_ll_udiv_small_label1) + orrs r0, r4, r5, lsr #30 + moveq r4, r5 + moveq r5, #0 // 0x0 + subeq r2, r2, #32 // 0x20 +ASM_PFX(_ll_udiv_small_label1): + mov r1, #0 // 0x0 + cmp r2, #16 // 0x10 + bcc ASM_PFX(_ll_udiv_small_label2) + movs r0, r4, lsr #14 + moveq r4, r4, lsl #16 + addeq r1, r1, #16 // 0x10 +ASM_PFX(_ll_udiv_small_label2): + sub lr, r2, r1 + cmp lr, #8 // 0x8 + bcc ASM_PFX(_ll_udiv_small_label3) + movs r0, r4, lsr #22 + moveq r4, r4, lsl #8 + addeq r1, r1, #8 // 0x8 +ASM_PFX(_ll_udiv_small_label3): + rsb r0, r1, #32 // 0x20 + sub r2, r2, r1 + orr r4, r4, r5, lsr r0 + mov r5, r5, lsl r1 + cmp r2, #1 // 0x1 + bcc ASM_PFX(_ll_udiv_small_label5) + sub r2, r2, #1 // 0x1 + and r0, r2, #7 // 0x7 + eor r0, r0, #7 // 0x7 + adds r0, r0, r0, lsl #1 + add pc, pc, r0, lsl #2 + nop // (mov r0,r0) +ASM_PFX(_ll_udiv_small_label4): + adcs r5, r5, r5 + adcs r4, ip, r4, lsl #1 + rsbcc r4, ip, r4 + adcs r5, r5, r5 + adcs r4, ip, r4, lsl #1 + rsbcc r4, ip, r4 + adcs r5, r5, r5 + adcs r4, ip, r4, lsl #1 + rsbcc r4, ip, r4 + adcs r5, r5, r5 + adcs r4, ip, r4, lsl #1 + rsbcc r4, ip, r4 + adcs r5, r5, r5 + adcs r4, ip, r4, lsl #1 + rsbcc r4, ip, r4 + adcs r5, r5, r5 + adcs r4, ip, r4, lsl #1 + rsbcc r4, ip, r4 + adcs r5, r5, r5 + adcs r4, ip, r4, lsl #1 + rsbcc r4, ip, r4 + adcs r5, r5, r5 + adcs r4, ip, r4, lsl #1 + sub r2, r2, #8 // 0x8 + tst r2, r2 + rsbcc r4, ip, r4 + bpl ASM_PFX(_ll_udiv_small_label4) +ASM_PFX(_ll_udiv_small_label5): + mov r2, r4, lsr r6 + bic r4, r4, r2, lsl r6 + adcs r0, r5, r5 + adc r1, r4, r4 + add r1, r1, r3, lsl r6 + mov r3, #0 // 0x0 + ldmia sp!, {r4, r5, r6, pc} + +ASM_PFX(_ll_udiv_big): + subs r0, r5, lr + mov r3, #0 // 0x0 + sbcs r1, r4, ip + movcs r5, r0 + movcs r4, r1 + adcs r3, r3, #0 // 0x0 + subs r0, r5, lr + sbcs r1, r4, ip + movcs r5, r0 + movcs r4, r1 + adcs r3, r3, #0 // 0x0 + subs r0, r5, lr + sbcs r1, r4, ip + movcs r5, r0 + movcs r4, r1 + adcs r3, r3, #0 // 0x0 + mov r1, #0 // 0x0 + rsbs lr, lr, #0 // 0x0 + rsc ip, ip, #0 // 0x0 + cmp r6, #16 // 0x10 + bcc ASM_PFX(_ll_udiv_big_label1) + movs r0, r4, lsr #14 + moveq r4, r4, lsl #16 + addeq r1, r1, #16 // 0x10 +ASM_PFX(_ll_udiv_big_label1): + sub r2, r6, r1 + cmp r2, #8 // 0x8 + bcc ASM_PFX(_ll_udiv_big_label2) + movs r0, r4, lsr #22 + moveq r4, r4, lsl #8 + addeq r1, r1, #8 // 0x8 +ASM_PFX(_ll_udiv_big_label2): + rsb r0, r1, #32 // 0x20 + sub r2, r6, r1 + orr r4, r4, r5, lsr r0 + mov r5, r5, lsl r1 + cmp r2, #1 // 0x1 + bcc ASM_PFX(_ll_udiv_big_label4) + sub r2, r2, #1 // 0x1 + and r0, r2, #3 // 0x3 + rsb r0, r0, #3 // 0x3 + adds r0, r0, r0, lsl #1 + add pc, pc, r0, lsl #3 + nop // (mov r0,r0) +ASM_PFX(_ll_udiv_big_label3): + adcs r5, r5, r5 + adcs r4, r4, r4 + adcs r0, lr, r5 + adcs r1, ip, r4 + movcs r5, r0 + movcs r4, r1 + adcs r5, r5, r5 + adcs r4, r4, r4 + adcs r0, lr, r5 + adcs r1, ip, r4 + movcs r5, r0 + movcs r4, r1 + adcs r5, r5, r5 + adcs r4, r4, r4 + adcs r0, lr, r5 + adcs r1, ip, r4 + movcs r5, r0 + movcs r4, r1 + sub r2, r2, #4 // 0x4 + adcs r5, r5, r5 + adcs r4, r4, r4 + adcs r0, lr, r5 + adcs r1, ip, r4 + tst r2, r2 + movcs r5, r0 + movcs r4, r1 + bpl ASM_PFX(_ll_udiv_big_label3) +ASM_PFX(_ll_udiv_big_label4): + mov r1, #0 // 0x0 + mov r2, r5, lsr r6 + bic r5, r5, r2, lsl r6 + adcs r0, r5, r5 + adc r1, r1, #0 // 0x0 + movs lr, r3, lsl r6 + mov r3, r4, lsr r6 + bic r4, r4, r3, lsl r6 + adc r1, r1, #0 // 0x0 + adds r0, r0, lr + orr r2, r2, r4, ror r6 + adc r1, r1, #0 // 0x0 + ldmia sp!, {r4, r5, r6, pc} + +ASM_PFX(_ll_udiv_ginormous): + subs r2, r5, lr + mov r1, #0 // 0x0 + sbcs r3, r4, ip + adc r0, r1, r1 + movcc r2, r5 + movcc r3, r4 + ldmia sp!, {r4, r5, r6, pc} + +ASM_PFX(_ll_div0): + ldmia sp!, {r4, r5, r6, lr} + mov r0, #0 // 0x0 + mov r1, #0 // 0x0 + b ASM_PFX(__aeabi_ldiv0) + +ASM_PFX(__aeabi_ldiv0): + bx r14 + +