2008-11-05 00:10:05 +00:00
|
|
|
/***************************************************************************
|
|
|
|
* __________ __ ___.
|
|
|
|
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
|
|
|
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
|
|
|
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
|
|
|
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
|
|
|
* \/ \/ \/ \/ \/
|
|
|
|
* $Id$
|
|
|
|
*
|
|
|
|
* Copyright (C) 2008 by Jens Arnold
|
2009-12-31 08:32:15 +00:00
|
|
|
* Copyright (C) 2009 by Andrew Mahone
|
2008-11-05 00:10:05 +00:00
|
|
|
*
|
|
|
|
* Optimised unsigned integer division for ARMv4
|
|
|
|
*
|
|
|
|
* Based on: libgcc routines for ARM cpu.
|
|
|
|
* Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
|
|
|
|
* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
|
|
|
|
* Free Software Foundation, Inc.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version 2
|
|
|
|
* of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
|
|
|
* KIND, either express or implied.
|
|
|
|
*
|
|
|
|
****************************************************************************/
|
|
|
|
|
|
|
|
#include "config.h"
|
|
|
|
/* Codecs should not normally do this, but we need to check a macro, and
|
|
|
|
* codecs.h would confuse the assembler. */
|
|
|
|
|
2009-12-31 08:32:15 +00:00
|
|
|
/* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2)
|
|
|
|
for dividing a 30-bit value by a 15-bit value, with two operations per
|
|
|
|
iteration by storing quotient and remainder together and adding the previous
|
|
|
|
quotient bit during trial subtraction. Modified to work with any dividend
|
|
|
|
and divisor both less than 1 << 30, and skipping trials by calculating bits
|
|
|
|
in output.
|
|
|
|
*/
|
|
|
|
.macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient
|
|
|
|
|
|
|
|
mov \bits, #1
|
|
|
|
cmp \divisor, \dividend, lsr #16
|
|
|
|
movls \divisor, \divisor, lsl #16
|
|
|
|
addls \bits, \bits, #16
|
|
|
|
cmp \divisor, \dividend, lsr #8
|
|
|
|
movls \divisor, \divisor, lsl #8
|
|
|
|
addls \bits, \bits, #8
|
|
|
|
cmp \divisor, \dividend, lsr #4
|
|
|
|
movls \divisor, \divisor, lsl #4
|
|
|
|
addls \bits, \bits, #4
|
|
|
|
cmp \divisor, \dividend, lsr #2
|
|
|
|
movls \divisor, \divisor, lsl #2
|
|
|
|
addls \bits, \bits, #2
|
|
|
|
cmp \divisor, \dividend, lsr #1
|
|
|
|
movls \divisor, \divisor, lsl #1
|
|
|
|
addls \bits, \bits, #1
|
|
|
|
rsb \divisor, \divisor, #0
|
|
|
|
adds \result, \dividend, \divisor
|
|
|
|
subcc \result, \result, \divisor
|
|
|
|
rsb \curbit, \bits, #31
|
|
|
|
add pc, pc, \curbit, lsl #3
|
|
|
|
nop
|
|
|
|
.rept 30
|
|
|
|
adcs \result, \divisor, \result, lsl #1
|
|
|
|
subcc \result, \result, \divisor
|
|
|
|
.endr
|
|
|
|
/* shift remainder/quotient left one, add final quotient bit */
|
|
|
|
adc \result, \result, \result
|
|
|
|
mov \dividend, \result, lsr \bits
|
|
|
|
eor \quotient, \result, \dividend, lsl \bits
|
|
|
|
.endm
|
|
|
|
|
|
|
|
.macro ARM_DIV_32_BODY dividend, divisor, result, curbit
|
2008-11-05 00:10:05 +00:00
|
|
|
|
|
|
|
mov \result, \dividend
|
2008-11-06 21:21:33 +00:00
|
|
|
mov \curbit, #90 @ 3 * 30, (calculating branch dest)
|
2008-11-05 00:10:05 +00:00
|
|
|
cmp \divisor, \result, lsr #16
|
|
|
|
movls \result,\result, lsr #16
|
|
|
|
subls \curbit, \curbit, #48
|
|
|
|
cmp \divisor, \result, lsr #8
|
|
|
|
movls \result,\result, lsr #8
|
|
|
|
subls \curbit, \curbit, #24
|
|
|
|
cmp \divisor, \result, lsr #4
|
|
|
|
movls \result,\result, lsr #4
|
|
|
|
subls \curbit, \curbit, #12
|
|
|
|
cmp \divisor, \result, lsr #2
|
|
|
|
subls \curbit, \curbit, #6
|
2008-11-06 21:21:33 +00:00
|
|
|
@ Calculation is only done down to shift=2, because the shift=1 step
|
|
|
|
@ would need 3 more cycles, but would only gain 1.5 cycles on average.
|
2008-11-05 00:10:05 +00:00
|
|
|
mov \result, #0
|
|
|
|
add pc, pc, \curbit, lsl #2
|
|
|
|
nop
|
|
|
|
.set shift, 32
|
2008-11-06 21:21:33 +00:00
|
|
|
.rept 31
|
2008-11-05 00:10:05 +00:00
|
|
|
.set shift, shift - 1
|
2008-11-06 21:21:33 +00:00
|
|
|
cmp \divisor, \dividend, lsr #shift
|
|
|
|
orrls \result, \result, #(1 << shift)
|
|
|
|
subls \dividend, \dividend, \divisor, lsl #shift
|
|
|
|
.endr @ shift==0 in the .rept would cause a warning for lsr #0
|
|
|
|
cmp \divisor, \dividend
|
|
|
|
orrls \result, \result, #1
|
|
|
|
@subls \dividend, \dividend, \divisor @ correct remainder not needed
|
2008-11-05 00:10:05 +00:00
|
|
|
.endm
|
|
|
|
|
|
|
|
#ifdef USE_IRAM
|
|
|
|
.section .icode,"ax",%progbits
|
|
|
|
#else
|
|
|
|
.text
|
|
|
|
#endif
|
|
|
|
.align
|
|
|
|
.global udiv32_arm
|
|
|
|
.type udiv32_arm,%function
|
|
|
|
|
|
|
|
udiv32_arm:
|
2010-01-02 15:15:21 +00:00
|
|
|
cmp r1, #0
|
|
|
|
beq 20f
|
2009-12-31 08:32:15 +00:00
|
|
|
tst r0, r0
|
|
|
|
/* High bit must be unset, otherwise use ARM_DIV_32_BODY. High bit of
|
|
|
|
divisor is also unset dividend has been tested to be >= divisor.
|
|
|
|
*/
|
2010-01-02 15:15:21 +00:00
|
|
|
bmi 10f
|
2009-12-31 08:32:15 +00:00
|
|
|
ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0
|
|
|
|
bx lr
|
2008-11-05 00:10:05 +00:00
|
|
|
|
2010-01-02 15:15:21 +00:00
|
|
|
10:
|
2009-12-31 08:32:15 +00:00
|
|
|
ARM_DIV_32_BODY r0, r1, r2, r3
|
2008-11-05 00:10:05 +00:00
|
|
|
mov r0, r2
|
|
|
|
bx lr
|
|
|
|
|
|
|
|
20:
|
|
|
|
movne r0, #0
|
2010-01-02 15:15:21 +00:00
|
|
|
bx lr
|