patch #785885 by Magnus Holmgren: way faster rolo, with assembler

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@3971 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Jörg Hohensohn 2003-10-12 16:40:45 +00:00
parent 1f30fa7db0
commit cbbbcd1800
5 changed files with 147 additions and 51 deletions

View file

@ -30,7 +30,7 @@ endif
SRC := $(wildcard drivers/*.c common/*.c *.c)
OBJS := $(SRC:%.c=$(OBJDIR)/%.o) $(OBJDIR)/crt0.o $(OBJDIR)/bitswap.o
OBJS := $(SRC:%.c=$(OBJDIR)/%.o) $(OBJDIR)/crt0.o $(OBJDIR)/bitswap.o $(OBJDIR)/descramble.o
DEPS:=.deps
DEPDIRS:=$(DEPS) $(DEPS)/drivers $(DEPS)/common $(DEPS)/malloc

View file

@ -55,8 +55,6 @@ SECTIONS
/* We put the copy of the .iram section here to save space */
_iramcopy = .;
. += 0x2000;
_topramcopy = .;
. += 0x300;
_stackend = .;
} > DRAM
@ -82,17 +80,11 @@ SECTIONS
_mp3buffer = .;
} > DRAM
.mp3end ENDADDR - 0x300:
.mp3end ENDADDR:
{
_mp3end = .;
} > DRAM
.topram : AT ( _topramcopy ) {
_topramstart = .;
*(.topcode)
_topramend = .;
} > DRAM
.plugin ENDADDR:
{
_pluginbuf = .;

View file

@ -112,20 +112,6 @@ copy_l:
bf copy_l
nop
/* copy the .topram section */
mov.l topramcopy_k,r0
mov.l topram_k,r1
mov.l topramend_k,r2
copy_l2:
mov.l @r0,r3
mov.l r3,@r1
add #4,r0
add #4,r1
cmp/ge r2,r1
bf copy_l2
nop
/* Munge the main thread stack */
mov.l stack_k,r2
mov.l deadbeef_k,r0
@ -164,12 +150,6 @@ iram_k:
.long _iramstart
iramend_k:
.long _iramend
topramcopy_k:
.long _topramcopy
topram_k:
.long _topramstart
topramend_k:
.long _topramend
main_k:
.long _main
vbr_k:

132
firmware/descramble.S Normal file
View file

@ -0,0 +1,132 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2003 by Magnus Holmgren
*
* All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
.section .icode,"ax",@progbits
.align 4
.global _descramble
.type _descramble,@function
/* Descramble a block of byte data, from source to dest, processing len
* bytes. Size only limited by the len argument. Note that len must
* be an even multiple of 4 (something rolo_load() already assumes.
* (Does the Archos firmware loader also require that?).
*
* Returns the 16-bit "sum" checksum of the descrambled data.
*
* Arguments:
* r4 - source (unsigned char*)
* r5 - dest (unsigned char*)
* r6 - len (unsigned int)
*/
/* Register usage:
* i - r0
* i4 - r1
* checksum - r2
* addr - r3
* source - r4
* dest - r5
* len - r6
* len4 - r7
* data - r8
* temp - r9
*/
_descramble:
mov.l r8,@-r15
mov.l r9,@-r15
mov #0,r0 /* i = 0 */
mov #0,r1 /* i4 = i / 4 */
mov #0,r2 /* checksum = 0 */
mov r4,r3 /* addr = source */
mov r6,r7
shlr2 r7 /* len4 = len / 4 */
.loop:
mov.b @r3,r8 /* data = source[addr] */
add r7,r3 /* addr += len4 */
extu.b r8,r8 /* we want the data extended unsigned */
shlr r8 /* start rotate right of low byte */
movt r9 /* get bit 0 that was shifted out */
shll8 r9
shlr r9 /* move it to bit 7 */
or r9,r8 /* finish rotate right */
not r8,r8
extu.b r8,r8
mov.b r8,@(r0,r5) /* dest[i] = data */
add r8,r2 /* checksum += data[i] */
add #1,r0 /* i++ */
tst #3,r0 /* reset addr? */
bf .loop
add #1,r1 /* i4++ */
mov r4,r3
add r1,r3 /* addr = source + i4 */
cmp/hs r6,r0 /* all done? */
bf .loop
/* 17 cycles if no "reset," 22 if reset => average 18.25 cycles per
* byte, assuming no wait states from reads or writes. "Old" algorithm
* needed 24-26 cycles per byte, under the same assumptions.
*/
mov.l @r15+,r9
mov.l @r15+,r8
rts
extu.w r2,r0
/* Move len bytes from source to dest (which must be suitably aligned for
* long moves) and jump to dest + 0x200.
*
* Arguments:
* r4 - source
* r5 - dest
* r6 - len
*/
.global _rolo_restart
.type _rolo_restart,@function
_rolo_restart:
mov.w .offset,r0
mov r5,r7
add r0,r7 /* start_func() */
mov r6,r0
shlr2 r0
add #1,r0
.copy:
mov.l @r4+,r1
add #-1,r0
mov.l r1,@r5
add #4,r5
cmp/eq #0,r0
bf .copy
jmp @r7
nop
.offset:
.word 0x200
.end:
.size _descramble,.end-_descramble

View file

@ -89,20 +89,24 @@ static void rolo_error(char *text)
button_get(true);
lcd_stop_scroll();
}
/* these are in assembler file "descramble.S" */
extern unsigned short descramble(unsigned char* source, unsigned char* dest, int length);
extern void rolo_restart(unsigned char* source, unsigned char* dest, int length);
/***************************************************************************
*
* Name: rolo_load_app(char *filename,int scrambled)
* Filename must be a fully defined filename including the path and extension
*
***************************************************************************/
int rolo_load(char* filename) __attribute__ ((section (".topcode")));
int rolo_load(char* filename)
{
int fd,slen;
unsigned long length,file_length,i;
int fd;
unsigned long length;
unsigned long file_length;
unsigned short checksum,file_checksum;
unsigned char* ramstart = (void*)0x09000000;
void (*start_func)(void) = (void*)ramstart + 0x200;
bool restore_io; /* debug value */
lcd_clear_display();
@ -152,19 +156,9 @@ int rolo_load(char* filename)
lcd_puts(0, 1, "Descramble");
lcd_update();
/* descramble */
slen = length/4;
for (i = 0; i < length; i++) {
unsigned long addr = ((i % slen) << 2) + i/slen;
unsigned char data = mp3buf[i+length];
data = ~((data >> 1) | ((data << 7) & 0x80)); /* poor man's ROR */
mp3buf[addr] = data;
}
checksum = descramble(mp3buf + length, mp3buf, length);
/* Compute checksum and verify against checksum from file header */
checksum=0;
for (i=0; i<length; i++)
checksum += mp3buf[i];
/* Verify checksum against file header */
if (checksum != file_checksum) {
rolo_error("Checksum Error");
@ -185,16 +179,14 @@ int rolo_load(char* filename)
system_init(); /* Initialize system for restart */
i2c_init(); /* Init i2c bus - it seems like a good idea */
ICR = IRQ0_EDGE_TRIGGER; /* Make IRQ0 edge triggered */
#ifndef ARCHOS_PLAYER /* player is to be checked later */
PAIOR = 0x0FA0; /* needed when flashed, probably model-specific */
#endif
if (restore_io) /* test code */
rolo_io_restore(); /* restore the I/Os from the file content */
/* move firmware to start of ram */
for ( i=0; i < length/4+1; i++ )
((unsigned int*)ramstart)[i] = ((unsigned int*)mp3buf)[i];
start_func(); /* start new firmware */
rolo_restart(mp3buf, ramstart, length);
return 0; /* this is never reached */
}