* Add assembly optimised variants for memcpy, memset and find_first_set_bit
 * Add option to map_address in MMU to set caching algorithm


git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19920 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Maurus Cuelenaere 2009-02-04 17:33:19 +00:00
parent 01bd736e00
commit 9b13a5d151
5 changed files with 446 additions and 11 deletions

View file

@ -400,15 +400,13 @@ target/arm/crt0.S
#elif defined(CPU_MIPS)
#undef mips
/*target/mips/memcpy.S
target/mips/memset.S
common/memset16.c
target/mips/strlen.S*/
common/memcpy.c
/*target/mips/strlen.S*/
common/memmove.c
common/memset.c
common/memset16.c
common/strlen.c
target/mips/ffs-mips.S
target/mips/memcpy-mips.S
target/mips/memset-mips.S
target/mips/mmu-mips.c
#if CONFIG_CPU==JZ4732
target/mips/ingenic_jz47xx/crt0.S

View file

@ -0,0 +1,54 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2009 by Maurus Cuelenaere
* based on ffs-arm.S by Michael Sevakis
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
#include "mips.h"
/****************************************************************************
* int find_first_set_bit(uint32_t val);
*
* Find the index of the least significant set bit in the 32-bit word.
*
* return values:
* 0 - bit 0 is set
* 1 - bit 1 is set
* ...
* 31 - bit 31 is set
* 32 - no bits set
****************************************************************************/
.align 2
.global find_first_set_bit
.type find_first_set_bit, %function
find_first_set_bit:
beqz a0, no_bits_set # If val == 0 branch to no_bits_set
clz v0, a0 # Get lead 0's count
li t0, 31 # t0 = 31
sub v0, t0, v0 # Return value = t0 - v0
jr ra # Return
nop
no_bits_set:
li v0, 32 # Return value = 32
jr ra # Return
nop

View file

@ -0,0 +1,143 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
* This file was originally part of the GNU C Library
* Contributed to glibc by Hartvig Ekner <hartvige@mips.com>, 2002
* Adapted for Rockbox by Maurus Cuelenaere, 2009
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
#include "mips.h"
/* void *memcpy(void *s1, const void *s2, size_t n); */
#ifdef ROCKBOX_BIG_ENDIAN
# define LWHI lwl /* high part is left in big-endian */
# define SWHI swl /* high part is left in big-endian */
# define LWLO lwr /* low part is right in big-endian */
# define SWLO swr /* low part is right in big-endian */
#else
# define LWHI lwr /* high part is right in little-endian */
# define SWHI swr /* high part is right in little-endian */
# define LWLO lwl /* low part is left in little-endian */
# define SWLO swl /* low part is left in little-endian */
#endif
.section .icode, "ax", %progbits
.global memcpy
.type memcpy, %function
.set noreorder
memcpy:
slti t0, a2, 8 # Less than 8?
bne t0, zero, last8
move v0, a0 # Setup exit value before too late
xor t0, a1, a0 # Find a0/a1 displacement
andi t0, 0x3
bne t0, zero, shift # Go handle the unaligned case
subu t1, zero, a1
andi t1, 0x3 # a0/a1 are aligned, but are we
beq t1, zero, chk8w # starting in the middle of a word?
subu a2, t1
LWHI t0, 0(a1) # Yes we are... take care of that
addu a1, t1
SWHI t0, 0(a0)
addu a0, t1
chk8w:
andi t0, a2, 0x1f # 32 or more bytes left?
beq t0, a2, chk1w
subu a3, a2, t0 # Yes
addu a3, a1 # a3 = end address of loop
move a2, t0 # a2 = what will be left after loop
lop8w:
lw t0, 0(a1) # Loop taking 8 words at a time
lw t1, 4(a1)
lw t2, 8(a1)
lw t3, 12(a1)
lw t4, 16(a1)
lw t5, 20(a1)
lw t6, 24(a1)
lw t7, 28(a1)
addiu a0, 32
addiu a1, 32
sw t0, -32(a0)
sw t1, -28(a0)
sw t2, -24(a0)
sw t3, -20(a0)
sw t4, -16(a0)
sw t5, -12(a0)
sw t6, -8(a0)
bne a1, a3, lop8w
sw t7, -4(a0)
chk1w:
andi t0, a2, 0x3 # 4 or more bytes left?
beq t0, a2, last8
subu a3, a2, t0 # Yes, handle them one word at a time
addu a3, a1 # a3 again end address
move a2, t0
lop1w:
lw t0, 0(a1)
addiu a0, 4
addiu a1, 4
bne a1, a3, lop1w
sw t0, -4(a0)
last8:
blez a2, lst8e # Handle last 8 bytes, one at a time
addu a3, a2, a1
lst8l:
lb t0, 0(a1)
addiu a0, 1
addiu a1, 1
bne a1, a3, lst8l
sb t0, -1(a0)
lst8e:
jr ra # Bye, bye
nop
shift:
subu a3, zero, a0 # Src and Dest unaligned
andi a3, 0x3 # (unoptimized case...)
beq a3, zero, shft1
subu a2, a3 # a2 = bytes left
LWHI t0, 0(a1) # Take care of first odd part
LWLO t0, 3(a1)
addu a1, a3
SWHI t0, 0(a0)
addu a0, a3
shft1:
andi t0, a2, 0x3
subu a3, a2, t0
addu a3, a1
shfth:
LWHI t1, 0(a1) # Limp through, word by word
LWLO t1, 3(a1)
addiu a0, 4
addiu a1, 4
bne a1, a3, shfth
sw t1, -4(a0)
b last8 # Handle anything which may be left
move a2, t0
.set reorder

View file

@ -0,0 +1,239 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* This file was originally part of the Linux/MIPS GNU C Library
* Copyright (C) 1998 by Ralf Baechle
* Adapted for Rockbox by Maurus Cuelenaere, 2009
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
#include "mips.h"
#define FILL256(dst, offset, val) \
sw val, (offset + 0x00)(dst); \
sw val, (offset + 0x04)(dst); \
sw val, (offset + 0x08)(dst); \
sw val, (offset + 0x0c)(dst); \
sw val, (offset + 0x10)(dst); \
sw val, (offset + 0x14)(dst); \
sw val, (offset + 0x18)(dst); \
sw val, (offset + 0x1c)(dst); \
sw val, (offset + 0x20)(dst); \
sw val, (offset + 0x24)(dst); \
sw val, (offset + 0x28)(dst); \
sw val, (offset + 0x2c)(dst); \
sw val, (offset + 0x30)(dst); \
sw val, (offset + 0x34)(dst); \
sw val, (offset + 0x38)(dst); \
sw val, (offset + 0x3c)(dst); \
sw val, (offset + 0x40)(dst); \
sw val, (offset + 0x44)(dst); \
sw val, (offset + 0x48)(dst); \
sw val, (offset + 0x4c)(dst); \
sw val, (offset + 0x50)(dst); \
sw val, (offset + 0x54)(dst); \
sw val, (offset + 0x58)(dst); \
sw val, (offset + 0x5c)(dst); \
sw val, (offset + 0x60)(dst); \
sw val, (offset + 0x64)(dst); \
sw val, (offset + 0x68)(dst); \
sw val, (offset + 0x6c)(dst); \
sw val, (offset + 0x70)(dst); \
sw val, (offset + 0x74)(dst); \
sw val, (offset + 0x78)(dst); \
sw val, (offset + 0x7c)(dst); \
sw val, (offset + 0x80)(dst); \
sw val, (offset + 0x84)(dst); \
sw val, (offset + 0x88)(dst); \
sw val, (offset + 0x8c)(dst); \
sw val, (offset + 0x90)(dst); \
sw val, (offset + 0x94)(dst); \
sw val, (offset + 0x98)(dst); \
sw val, (offset + 0x9c)(dst); \
sw val, (offset + 0xa0)(dst); \
sw val, (offset + 0xa4)(dst); \
sw val, (offset + 0xa8)(dst); \
sw val, (offset + 0xac)(dst); \
sw val, (offset + 0xb0)(dst); \
sw val, (offset + 0xb4)(dst); \
sw val, (offset + 0xb8)(dst); \
sw val, (offset + 0xbc)(dst); \
sw val, (offset + 0xc0)(dst); \
sw val, (offset + 0xc4)(dst); \
sw val, (offset + 0xc8)(dst); \
sw val, (offset + 0xcc)(dst); \
sw val, (offset + 0xd0)(dst); \
sw val, (offset + 0xd4)(dst); \
sw val, (offset + 0xd8)(dst); \
sw val, (offset + 0xdc)(dst); \
sw val, (offset + 0xe0)(dst); \
sw val, (offset + 0xe4)(dst); \
sw val, (offset + 0xe8)(dst); \
sw val, (offset + 0xec)(dst); \
sw val, (offset + 0xf0)(dst); \
sw val, (offset + 0xf4)(dst); \
sw val, (offset + 0xf8)(dst); \
sw val, (offset + 0xfc)(dst);
#define FILL128(dst, offset, val) \
sw val, (offset + 0x00)(dst); \
sw val, (offset + 0x04)(dst); \
sw val, (offset + 0x08)(dst); \
sw val, (offset + 0x0c)(dst); \
sw val, (offset + 0x10)(dst); \
sw val, (offset + 0x14)(dst); \
sw val, (offset + 0x18)(dst); \
sw val, (offset + 0x1c)(dst); \
sw val, (offset + 0x20)(dst); \
sw val, (offset + 0x24)(dst); \
sw val, (offset + 0x28)(dst); \
sw val, (offset + 0x2c)(dst); \
sw val, (offset + 0x30)(dst); \
sw val, (offset + 0x34)(dst); \
sw val, (offset + 0x38)(dst); \
sw val, (offset + 0x3c)(dst); \
sw val, (offset + 0x40)(dst); \
sw val, (offset + 0x44)(dst); \
sw val, (offset + 0x48)(dst); \
sw val, (offset + 0x4c)(dst); \
sw val, (offset + 0x50)(dst); \
sw val, (offset + 0x54)(dst); \
sw val, (offset + 0x58)(dst); \
sw val, (offset + 0x5c)(dst); \
sw val, (offset + 0x60)(dst); \
sw val, (offset + 0x64)(dst); \
sw val, (offset + 0x68)(dst); \
sw val, (offset + 0x6c)(dst); \
sw val, (offset + 0x70)(dst); \
sw val, (offset + 0x74)(dst); \
sw val, (offset + 0x78)(dst); \
sw val, (offset + 0x7c)(dst);
#define FILL64(dst, offset, val) \
sw val, (offset + 0x00)(dst); \
sw val, (offset + 0x04)(dst); \
sw val, (offset + 0x08)(dst); \
sw val, (offset + 0x0c)(dst); \
sw val, (offset + 0x10)(dst); \
sw val, (offset + 0x14)(dst); \
sw val, (offset + 0x18)(dst); \
sw val, (offset + 0x1c)(dst); \
sw val, (offset + 0x20)(dst); \
sw val, (offset + 0x24)(dst); \
sw val, (offset + 0x28)(dst); \
sw val, (offset + 0x2c)(dst); \
sw val, (offset + 0x30)(dst); \
sw val, (offset + 0x34)(dst); \
sw val, (offset + 0x38)(dst); \
sw val, (offset + 0x3c)(dst);
#define FILL32(dst, offset, val) \
sw val, (offset + 0x00)(dst); \
sw val, (offset + 0x04)(dst); \
sw val, (offset + 0x08)(dst); \
sw val, (offset + 0x0c)(dst); \
sw val, (offset + 0x10)(dst); \
sw val, (offset + 0x14)(dst); \
sw val, (offset + 0x18)(dst); \
sw val, (offset + 0x1c)(dst);
#define FILL 32
#define F_FILL FILL32
#ifdef ROCKBOX_BIG_ENDIAN
# define SWHI swl /* high part is left in big-endian */
#else
# define SWHI swr /* high part is right in little-endian */
#endif
/*
* memset(void *s, int c, size_t n)
*
* a0: start of area to clear
* a1: char to fill with
* a2: size of area to clear
*/
.section .icode, "ax", %progbits
.global memset
.type memset, %function
.set noreorder
.align 5
memset:
beqz a1, 1f
move v0, a0 /* result */
andi a1, 0xff /* spread fillword */
sll t1, a1, 8
or a1, t1
sll t1, a1, 16
or a1, t1
1:
sltiu t0, a2, 4 /* very small region? */
bnez t0, small_memset
andi t0, a0, 3 /* aligned? */
beqz t0, 1f
subu t0, 4 /* alignment in bytes */
SWHI a1, (a0) /* make word aligned */
subu a0, t0 /* word align ptr */
addu a2, t0 /* correct size */
1: ori t1, a2, (FILL-1) /* # of full blocks */
xori t1, (FILL-1)
beqz t1, memset_partial /* no block to fill */
andi t0, a2, (FILL-4)
addu t1, a0 /* end address */
.set reorder
1: addiu a0, FILL
F_FILL( a0, -FILL, a1 )
bne t1, a0, 1b
.set noreorder
memset_partial:
la t1, 2f /* where to start */
subu t1, t0
jr t1
addu a0, t0 /* dest ptr */
F_FILL( a0, -FILL, a1 ) /* ... but first do words ... */
2: andi a2, 3 /* 0 <= n <= 3 to go */
beqz a2, 1f
addu a0, a2 /* What's left */
SWHI a1, -1(a0)
1: jr ra
move a2, zero
small_memset:
beqz a2, 2f
addu t1, a0, a2
1: addiu a0, 1 /* fill bytewise */
bne t1, a0, 1b
sb a1, -1(a0)
2: jr ra /* done */
move a2, zero
.set reorder

View file

@ -99,14 +99,15 @@ static void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,
restore_irq(old_irq);
}
void map_address(unsigned long virtual, unsigned long physical, unsigned long length)
void map_address(unsigned long virtual, unsigned long physical,
unsigned long length, unsigned int cache_flags)
{
unsigned long entry0 = (physical & PFN_MASK) << PFN_SHIFT;
unsigned long entry1 = ((physical+length) & PFN_MASK) << PFN_SHIFT;
unsigned long entryhi = virtual & ~VPN2_SHIFT;
entry0 |= (M_EntryLoG | M_EntryLoV | (K_CacheAttrC << S_EntryLoC) );
entry1 |= (M_EntryLoG | M_EntryLoV | (K_CacheAttrC << S_EntryLoC) );
entry0 |= (M_EntryLoG | M_EntryLoV | (cache_flags << S_EntryLoC) );
entry1 |= (M_EntryLoG | M_EntryLoV | (cache_flags << S_EntryLoC) );
add_wired_entry(entry0, entry1, entryhi, DEFAULT_PAGE_MASK);
}
@ -119,7 +120,7 @@ void tlb_init(void)
local_flush_tlb_all();
/*
map_address(0x80000000, 0x80000000, 0x4000);
map_address(0x80004000, 0x80004000, MEM * 0x100000);
map_address(0x80000000, 0x80000000, 0x4000, K_CacheAttrC);
map_address(0x80004000, 0x80004000, MEM * 0x100000, K_CacheAttrC);
*/
}