rockbox/firmware/bitswap.S
Jörg Hohensohn 1f30fa7db0 patch #801964 by Magnus Holmgren: 10% faster bitswap
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@3970 a1c6a512-1295-4272-9138-f99709370657
2003-10-12 15:45:03 +00:00

130 lines
4.8 KiB
ArmAsm

/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2002 by Magnus Holmgren
*
* All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
.section .icode,"ax",@progbits
.align 4
.global _bitswap
.type _bitswap,@function
/* Registers used:
*
* r0 Temporary (required by some instructions)
* r1 Low byte
* r2 High byte
* r3 Result after flip
* r4 Data
* r5 Length
* r6 1
* r7 Flip table
*/
_bitswap:
mov.l .fliptable,r7
mov #1,r6
mov r4,r0
tst #1,r0 /* odd address? */
bt .init /* no, address is even */
mov.b @r4,r0 /* swap first byte */
extu.b r0,r0
mov.b @(r0,r7),r0
mov.b r0,@r4
add #1,r4
add #-1,r5
bra .init
/* The instruction order below is a bit strange, because:
* 1) Keeping load/stores on longword boundaries means the instruction
* fetch won't compete with the memory access (because instructions
* are fetched in pairs).
* 2) Using the result of a fetch in the next instruction causes a
* stall (except in certain circumstances).
* See the SH-1 programming manual for details.
*/
.loop:
mov.w @r4,r1 /* data to flip */
add #-2,r5
swap.b r1,r2 /* get high byte */
extu.b r2,r0 /* prepare high byte */
mov.b @(r0,r7),r2 /* swap high byte */
extu.b r1,r0 /* perpare low byte */
mov.b @(r0,r7),r1 /* swap low byte */
extu.b r2,r2 /* zero extend high byte */
swap.b r2,r3 /* put high byte in result */
extu.b r1,r0 /* zero extend low byte */
or r0,r3 /* put low byte in result */
mov.w r3,@r4 /* store result */
add #2,r4
.init:
cmp/gt r6,r5 /* while [bytes remaining] > 1 */
bt .loop /* (at least 2 bytes left) */
cmp/eq r6,r5
bf .exit /* if not 1 byte left, exit */
mov.b @r4,r0 /* swap last byte */
extu.b r0,r0
mov.b @(r0,r7),r0
mov.b r0,@r4
.exit:
rts
nop
.align 4
.fliptable:
.long _fliptable
_fliptable:
.byte 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0
.byte 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0
.byte 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8
.byte 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8
.byte 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4
.byte 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4
.byte 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec
.byte 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc
.byte 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2
.byte 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2
.byte 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea
.byte 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa
.byte 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6
.byte 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6
.byte 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee
.byte 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe
.byte 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1
.byte 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1
.byte 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9
.byte 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9
.byte 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5
.byte 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5
.byte 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed
.byte 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd
.byte 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3
.byte 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3
.byte 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb
.byte 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb
.byte 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7
.byte 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7
.byte 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef
.byte 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff
.end:
.size _bitswap,.end-_bitswap