MIPS:
* Add assembly optimised variants for memcpy, memset and find_first_set_bit * Add option to map_address in MMU to set caching algorithm git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19920 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
01bd736e00
commit
9b13a5d151
5 changed files with 446 additions and 11 deletions
|
@ -400,15 +400,13 @@ target/arm/crt0.S
|
|||
|
||||
#elif defined(CPU_MIPS)
|
||||
#undef mips
|
||||
/*target/mips/memcpy.S
|
||||
target/mips/memset.S
|
||||
common/memset16.c
|
||||
target/mips/strlen.S*/
|
||||
common/memcpy.c
|
||||
/*target/mips/strlen.S*/
|
||||
common/memmove.c
|
||||
common/memset.c
|
||||
common/memset16.c
|
||||
common/strlen.c
|
||||
target/mips/ffs-mips.S
|
||||
target/mips/memcpy-mips.S
|
||||
target/mips/memset-mips.S
|
||||
target/mips/mmu-mips.c
|
||||
#if CONFIG_CPU==JZ4732
|
||||
target/mips/ingenic_jz47xx/crt0.S
|
||||
|
|
54
firmware/target/mips/ffs-mips.S
Normal file
54
firmware/target/mips/ffs-mips.S
Normal file
|
@ -0,0 +1,54 @@
|
|||
/***************************************************************************
|
||||
* __________ __ ___.
|
||||
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||
* \/ \/ \/ \/ \/
|
||||
* $Id$
|
||||
*
|
||||
* Copyright (C) 2009 by Maurus Cuelenaere
|
||||
* based on ffs-arm.S by Michael Sevakis
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||
* KIND, either express or implied.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#include "config.h"
|
||||
#include "mips.h"
|
||||
|
||||
/****************************************************************************
|
||||
* int find_first_set_bit(uint32_t val);
|
||||
*
|
||||
* Find the index of the least significant set bit in the 32-bit word.
|
||||
*
|
||||
* return values:
|
||||
* 0 - bit 0 is set
|
||||
* 1 - bit 1 is set
|
||||
* ...
|
||||
* 31 - bit 31 is set
|
||||
* 32 - no bits set
|
||||
****************************************************************************/
|
||||
.align 2
|
||||
.global find_first_set_bit
|
||||
.type find_first_set_bit, %function
|
||||
|
||||
find_first_set_bit:
|
||||
beqz a0, no_bits_set # If val == 0 branch to no_bits_set
|
||||
|
||||
clz v0, a0 # Get lead 0's count
|
||||
li t0, 31 # t0 = 31
|
||||
sub v0, t0, v0 # Return value = t0 - v0
|
||||
jr ra # Return
|
||||
nop
|
||||
|
||||
no_bits_set:
|
||||
li v0, 32 # Return value = 32
|
||||
jr ra # Return
|
||||
nop
|
143
firmware/target/mips/memcpy-mips.S
Normal file
143
firmware/target/mips/memcpy-mips.S
Normal file
|
@ -0,0 +1,143 @@
|
|||
/***************************************************************************
|
||||
* __________ __ ___.
|
||||
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||
* \/ \/ \/ \/ \/
|
||||
* $Id$
|
||||
*
|
||||
* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
|
||||
* This file was originally part of the GNU C Library
|
||||
* Contributed to glibc by Hartvig Ekner <hartvige@mips.com>, 2002
|
||||
* Adapted for Rockbox by Maurus Cuelenaere, 2009
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||
* KIND, either express or implied.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#include "config.h"
|
||||
#include "mips.h"
|
||||
|
||||
/* void *memcpy(void *s1, const void *s2, size_t n); */
|
||||
|
||||
#ifdef ROCKBOX_BIG_ENDIAN
|
||||
# define LWHI lwl /* high part is left in big-endian */
|
||||
# define SWHI swl /* high part is left in big-endian */
|
||||
# define LWLO lwr /* low part is right in big-endian */
|
||||
# define SWLO swr /* low part is right in big-endian */
|
||||
#else
|
||||
# define LWHI lwr /* high part is right in little-endian */
|
||||
# define SWHI swr /* high part is right in little-endian */
|
||||
# define LWLO lwl /* low part is left in little-endian */
|
||||
# define SWLO swl /* low part is left in little-endian */
|
||||
#endif
|
||||
|
||||
.section .icode, "ax", %progbits
|
||||
|
||||
.global memcpy
|
||||
.type memcpy, %function
|
||||
|
||||
.set noreorder
|
||||
|
||||
memcpy:
|
||||
slti t0, a2, 8 # Less than 8?
|
||||
bne t0, zero, last8
|
||||
move v0, a0 # Setup exit value before too late
|
||||
|
||||
xor t0, a1, a0 # Find a0/a1 displacement
|
||||
andi t0, 0x3
|
||||
bne t0, zero, shift # Go handle the unaligned case
|
||||
subu t1, zero, a1
|
||||
andi t1, 0x3 # a0/a1 are aligned, but are we
|
||||
beq t1, zero, chk8w # starting in the middle of a word?
|
||||
subu a2, t1
|
||||
LWHI t0, 0(a1) # Yes we are... take care of that
|
||||
addu a1, t1
|
||||
SWHI t0, 0(a0)
|
||||
addu a0, t1
|
||||
|
||||
chk8w:
|
||||
andi t0, a2, 0x1f # 32 or more bytes left?
|
||||
beq t0, a2, chk1w
|
||||
subu a3, a2, t0 # Yes
|
||||
addu a3, a1 # a3 = end address of loop
|
||||
move a2, t0 # a2 = what will be left after loop
|
||||
lop8w:
|
||||
lw t0, 0(a1) # Loop taking 8 words at a time
|
||||
lw t1, 4(a1)
|
||||
lw t2, 8(a1)
|
||||
lw t3, 12(a1)
|
||||
lw t4, 16(a1)
|
||||
lw t5, 20(a1)
|
||||
lw t6, 24(a1)
|
||||
lw t7, 28(a1)
|
||||
addiu a0, 32
|
||||
addiu a1, 32
|
||||
sw t0, -32(a0)
|
||||
sw t1, -28(a0)
|
||||
sw t2, -24(a0)
|
||||
sw t3, -20(a0)
|
||||
sw t4, -16(a0)
|
||||
sw t5, -12(a0)
|
||||
sw t6, -8(a0)
|
||||
bne a1, a3, lop8w
|
||||
sw t7, -4(a0)
|
||||
|
||||
chk1w:
|
||||
andi t0, a2, 0x3 # 4 or more bytes left?
|
||||
beq t0, a2, last8
|
||||
subu a3, a2, t0 # Yes, handle them one word at a time
|
||||
addu a3, a1 # a3 again end address
|
||||
move a2, t0
|
||||
lop1w:
|
||||
lw t0, 0(a1)
|
||||
addiu a0, 4
|
||||
addiu a1, 4
|
||||
bne a1, a3, lop1w
|
||||
sw t0, -4(a0)
|
||||
|
||||
last8:
|
||||
blez a2, lst8e # Handle last 8 bytes, one at a time
|
||||
addu a3, a2, a1
|
||||
lst8l:
|
||||
lb t0, 0(a1)
|
||||
addiu a0, 1
|
||||
addiu a1, 1
|
||||
bne a1, a3, lst8l
|
||||
sb t0, -1(a0)
|
||||
lst8e:
|
||||
jr ra # Bye, bye
|
||||
nop
|
||||
|
||||
shift:
|
||||
subu a3, zero, a0 # Src and Dest unaligned
|
||||
andi a3, 0x3 # (unoptimized case...)
|
||||
beq a3, zero, shft1
|
||||
subu a2, a3 # a2 = bytes left
|
||||
LWHI t0, 0(a1) # Take care of first odd part
|
||||
LWLO t0, 3(a1)
|
||||
addu a1, a3
|
||||
SWHI t0, 0(a0)
|
||||
addu a0, a3
|
||||
shft1:
|
||||
andi t0, a2, 0x3
|
||||
subu a3, a2, t0
|
||||
addu a3, a1
|
||||
shfth:
|
||||
LWHI t1, 0(a1) # Limp through, word by word
|
||||
LWLO t1, 3(a1)
|
||||
addiu a0, 4
|
||||
addiu a1, 4
|
||||
bne a1, a3, shfth
|
||||
sw t1, -4(a0)
|
||||
b last8 # Handle anything which may be left
|
||||
move a2, t0
|
||||
|
||||
.set reorder
|
239
firmware/target/mips/memset-mips.S
Normal file
239
firmware/target/mips/memset-mips.S
Normal file
|
@ -0,0 +1,239 @@
|
|||
/***************************************************************************
|
||||
* __________ __ ___.
|
||||
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||
* \/ \/ \/ \/ \/
|
||||
* $Id$
|
||||
*
|
||||
* This file was originally part of the Linux/MIPS GNU C Library
|
||||
* Copyright (C) 1998 by Ralf Baechle
|
||||
* Adapted for Rockbox by Maurus Cuelenaere, 2009
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||
* KIND, either express or implied.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#include "config.h"
|
||||
#include "mips.h"
|
||||
|
||||
#define FILL256(dst, offset, val) \
|
||||
sw val, (offset + 0x00)(dst); \
|
||||
sw val, (offset + 0x04)(dst); \
|
||||
sw val, (offset + 0x08)(dst); \
|
||||
sw val, (offset + 0x0c)(dst); \
|
||||
sw val, (offset + 0x10)(dst); \
|
||||
sw val, (offset + 0x14)(dst); \
|
||||
sw val, (offset + 0x18)(dst); \
|
||||
sw val, (offset + 0x1c)(dst); \
|
||||
sw val, (offset + 0x20)(dst); \
|
||||
sw val, (offset + 0x24)(dst); \
|
||||
sw val, (offset + 0x28)(dst); \
|
||||
sw val, (offset + 0x2c)(dst); \
|
||||
sw val, (offset + 0x30)(dst); \
|
||||
sw val, (offset + 0x34)(dst); \
|
||||
sw val, (offset + 0x38)(dst); \
|
||||
sw val, (offset + 0x3c)(dst); \
|
||||
sw val, (offset + 0x40)(dst); \
|
||||
sw val, (offset + 0x44)(dst); \
|
||||
sw val, (offset + 0x48)(dst); \
|
||||
sw val, (offset + 0x4c)(dst); \
|
||||
sw val, (offset + 0x50)(dst); \
|
||||
sw val, (offset + 0x54)(dst); \
|
||||
sw val, (offset + 0x58)(dst); \
|
||||
sw val, (offset + 0x5c)(dst); \
|
||||
sw val, (offset + 0x60)(dst); \
|
||||
sw val, (offset + 0x64)(dst); \
|
||||
sw val, (offset + 0x68)(dst); \
|
||||
sw val, (offset + 0x6c)(dst); \
|
||||
sw val, (offset + 0x70)(dst); \
|
||||
sw val, (offset + 0x74)(dst); \
|
||||
sw val, (offset + 0x78)(dst); \
|
||||
sw val, (offset + 0x7c)(dst); \
|
||||
sw val, (offset + 0x80)(dst); \
|
||||
sw val, (offset + 0x84)(dst); \
|
||||
sw val, (offset + 0x88)(dst); \
|
||||
sw val, (offset + 0x8c)(dst); \
|
||||
sw val, (offset + 0x90)(dst); \
|
||||
sw val, (offset + 0x94)(dst); \
|
||||
sw val, (offset + 0x98)(dst); \
|
||||
sw val, (offset + 0x9c)(dst); \
|
||||
sw val, (offset + 0xa0)(dst); \
|
||||
sw val, (offset + 0xa4)(dst); \
|
||||
sw val, (offset + 0xa8)(dst); \
|
||||
sw val, (offset + 0xac)(dst); \
|
||||
sw val, (offset + 0xb0)(dst); \
|
||||
sw val, (offset + 0xb4)(dst); \
|
||||
sw val, (offset + 0xb8)(dst); \
|
||||
sw val, (offset + 0xbc)(dst); \
|
||||
sw val, (offset + 0xc0)(dst); \
|
||||
sw val, (offset + 0xc4)(dst); \
|
||||
sw val, (offset + 0xc8)(dst); \
|
||||
sw val, (offset + 0xcc)(dst); \
|
||||
sw val, (offset + 0xd0)(dst); \
|
||||
sw val, (offset + 0xd4)(dst); \
|
||||
sw val, (offset + 0xd8)(dst); \
|
||||
sw val, (offset + 0xdc)(dst); \
|
||||
sw val, (offset + 0xe0)(dst); \
|
||||
sw val, (offset + 0xe4)(dst); \
|
||||
sw val, (offset + 0xe8)(dst); \
|
||||
sw val, (offset + 0xec)(dst); \
|
||||
sw val, (offset + 0xf0)(dst); \
|
||||
sw val, (offset + 0xf4)(dst); \
|
||||
sw val, (offset + 0xf8)(dst); \
|
||||
sw val, (offset + 0xfc)(dst);
|
||||
|
||||
#define FILL128(dst, offset, val) \
|
||||
sw val, (offset + 0x00)(dst); \
|
||||
sw val, (offset + 0x04)(dst); \
|
||||
sw val, (offset + 0x08)(dst); \
|
||||
sw val, (offset + 0x0c)(dst); \
|
||||
sw val, (offset + 0x10)(dst); \
|
||||
sw val, (offset + 0x14)(dst); \
|
||||
sw val, (offset + 0x18)(dst); \
|
||||
sw val, (offset + 0x1c)(dst); \
|
||||
sw val, (offset + 0x20)(dst); \
|
||||
sw val, (offset + 0x24)(dst); \
|
||||
sw val, (offset + 0x28)(dst); \
|
||||
sw val, (offset + 0x2c)(dst); \
|
||||
sw val, (offset + 0x30)(dst); \
|
||||
sw val, (offset + 0x34)(dst); \
|
||||
sw val, (offset + 0x38)(dst); \
|
||||
sw val, (offset + 0x3c)(dst); \
|
||||
sw val, (offset + 0x40)(dst); \
|
||||
sw val, (offset + 0x44)(dst); \
|
||||
sw val, (offset + 0x48)(dst); \
|
||||
sw val, (offset + 0x4c)(dst); \
|
||||
sw val, (offset + 0x50)(dst); \
|
||||
sw val, (offset + 0x54)(dst); \
|
||||
sw val, (offset + 0x58)(dst); \
|
||||
sw val, (offset + 0x5c)(dst); \
|
||||
sw val, (offset + 0x60)(dst); \
|
||||
sw val, (offset + 0x64)(dst); \
|
||||
sw val, (offset + 0x68)(dst); \
|
||||
sw val, (offset + 0x6c)(dst); \
|
||||
sw val, (offset + 0x70)(dst); \
|
||||
sw val, (offset + 0x74)(dst); \
|
||||
sw val, (offset + 0x78)(dst); \
|
||||
sw val, (offset + 0x7c)(dst);
|
||||
|
||||
#define FILL64(dst, offset, val) \
|
||||
sw val, (offset + 0x00)(dst); \
|
||||
sw val, (offset + 0x04)(dst); \
|
||||
sw val, (offset + 0x08)(dst); \
|
||||
sw val, (offset + 0x0c)(dst); \
|
||||
sw val, (offset + 0x10)(dst); \
|
||||
sw val, (offset + 0x14)(dst); \
|
||||
sw val, (offset + 0x18)(dst); \
|
||||
sw val, (offset + 0x1c)(dst); \
|
||||
sw val, (offset + 0x20)(dst); \
|
||||
sw val, (offset + 0x24)(dst); \
|
||||
sw val, (offset + 0x28)(dst); \
|
||||
sw val, (offset + 0x2c)(dst); \
|
||||
sw val, (offset + 0x30)(dst); \
|
||||
sw val, (offset + 0x34)(dst); \
|
||||
sw val, (offset + 0x38)(dst); \
|
||||
sw val, (offset + 0x3c)(dst);
|
||||
|
||||
#define FILL32(dst, offset, val) \
|
||||
sw val, (offset + 0x00)(dst); \
|
||||
sw val, (offset + 0x04)(dst); \
|
||||
sw val, (offset + 0x08)(dst); \
|
||||
sw val, (offset + 0x0c)(dst); \
|
||||
sw val, (offset + 0x10)(dst); \
|
||||
sw val, (offset + 0x14)(dst); \
|
||||
sw val, (offset + 0x18)(dst); \
|
||||
sw val, (offset + 0x1c)(dst);
|
||||
|
||||
#define FILL 32
|
||||
#define F_FILL FILL32
|
||||
|
||||
|
||||
#ifdef ROCKBOX_BIG_ENDIAN
|
||||
# define SWHI swl /* high part is left in big-endian */
|
||||
#else
|
||||
# define SWHI swr /* high part is right in little-endian */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* memset(void *s, int c, size_t n)
|
||||
*
|
||||
* a0: start of area to clear
|
||||
* a1: char to fill with
|
||||
* a2: size of area to clear
|
||||
*/
|
||||
.section .icode, "ax", %progbits
|
||||
|
||||
.global memset
|
||||
.type memset, %function
|
||||
|
||||
.set noreorder
|
||||
.align 5
|
||||
memset:
|
||||
beqz a1, 1f
|
||||
move v0, a0 /* result */
|
||||
|
||||
andi a1, 0xff /* spread fillword */
|
||||
sll t1, a1, 8
|
||||
or a1, t1
|
||||
sll t1, a1, 16
|
||||
or a1, t1
|
||||
1:
|
||||
|
||||
sltiu t0, a2, 4 /* very small region? */
|
||||
bnez t0, small_memset
|
||||
andi t0, a0, 3 /* aligned? */
|
||||
|
||||
beqz t0, 1f
|
||||
subu t0, 4 /* alignment in bytes */
|
||||
|
||||
SWHI a1, (a0) /* make word aligned */
|
||||
subu a0, t0 /* word align ptr */
|
||||
addu a2, t0 /* correct size */
|
||||
|
||||
1: ori t1, a2, (FILL-1) /* # of full blocks */
|
||||
xori t1, (FILL-1)
|
||||
beqz t1, memset_partial /* no block to fill */
|
||||
andi t0, a2, (FILL-4)
|
||||
|
||||
addu t1, a0 /* end address */
|
||||
.set reorder
|
||||
1: addiu a0, FILL
|
||||
F_FILL( a0, -FILL, a1 )
|
||||
bne t1, a0, 1b
|
||||
.set noreorder
|
||||
|
||||
memset_partial:
|
||||
la t1, 2f /* where to start */
|
||||
subu t1, t0
|
||||
jr t1
|
||||
addu a0, t0 /* dest ptr */
|
||||
|
||||
F_FILL( a0, -FILL, a1 ) /* ... but first do words ... */
|
||||
2: andi a2, 3 /* 0 <= n <= 3 to go */
|
||||
|
||||
beqz a2, 1f
|
||||
addu a0, a2 /* What's left */
|
||||
SWHI a1, -1(a0)
|
||||
1: jr ra
|
||||
move a2, zero
|
||||
|
||||
small_memset:
|
||||
beqz a2, 2f
|
||||
addu t1, a0, a2
|
||||
|
||||
1: addiu a0, 1 /* fill bytewise */
|
||||
bne t1, a0, 1b
|
||||
sb a1, -1(a0)
|
||||
|
||||
2: jr ra /* done */
|
||||
move a2, zero
|
||||
|
||||
.set reorder
|
|
@ -99,14 +99,15 @@ static void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1,
|
|||
restore_irq(old_irq);
|
||||
}
|
||||
|
||||
void map_address(unsigned long virtual, unsigned long physical, unsigned long length)
|
||||
void map_address(unsigned long virtual, unsigned long physical,
|
||||
unsigned long length, unsigned int cache_flags)
|
||||
{
|
||||
unsigned long entry0 = (physical & PFN_MASK) << PFN_SHIFT;
|
||||
unsigned long entry1 = ((physical+length) & PFN_MASK) << PFN_SHIFT;
|
||||
unsigned long entryhi = virtual & ~VPN2_SHIFT;
|
||||
|
||||
entry0 |= (M_EntryLoG | M_EntryLoV | (K_CacheAttrC << S_EntryLoC) );
|
||||
entry1 |= (M_EntryLoG | M_EntryLoV | (K_CacheAttrC << S_EntryLoC) );
|
||||
entry0 |= (M_EntryLoG | M_EntryLoV | (cache_flags << S_EntryLoC) );
|
||||
entry1 |= (M_EntryLoG | M_EntryLoV | (cache_flags << S_EntryLoC) );
|
||||
|
||||
add_wired_entry(entry0, entry1, entryhi, DEFAULT_PAGE_MASK);
|
||||
}
|
||||
|
@ -119,7 +120,7 @@ void tlb_init(void)
|
|||
|
||||
local_flush_tlb_all();
|
||||
/*
|
||||
map_address(0x80000000, 0x80000000, 0x4000);
|
||||
map_address(0x80004000, 0x80004000, MEM * 0x100000);
|
||||
map_address(0x80000000, 0x80000000, 0x4000, K_CacheAttrC);
|
||||
map_address(0x80004000, 0x80004000, MEM * 0x100000, K_CacheAttrC);
|
||||
*/
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue