From cbbbcd1800c91319c98da6d1d98a488fdd79f144 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Hohensohn?= Date: Sun, 12 Oct 2003 16:40:45 +0000 Subject: [PATCH] patch #785885 by Magnus Holmgren: way faster rolo, with assembler git-svn-id: svn://svn.rockbox.org/rockbox/trunk@3971 a1c6a512-1295-4272-9138-f99709370657 --- firmware/Makefile | 2 +- firmware/app.lds | 10 +--- firmware/crt0.S | 20 ------- firmware/descramble.S | 132 ++++++++++++++++++++++++++++++++++++++++++ firmware/rolo.c | 34 +++++------ 5 files changed, 147 insertions(+), 51 deletions(-) create mode 100644 firmware/descramble.S diff --git a/firmware/Makefile b/firmware/Makefile index 916a1a4094..93ee38ac78 100644 --- a/firmware/Makefile +++ b/firmware/Makefile @@ -30,7 +30,7 @@ endif SRC := $(wildcard drivers/*.c common/*.c *.c) -OBJS := $(SRC:%.c=$(OBJDIR)/%.o) $(OBJDIR)/crt0.o $(OBJDIR)/bitswap.o +OBJS := $(SRC:%.c=$(OBJDIR)/%.o) $(OBJDIR)/crt0.o $(OBJDIR)/bitswap.o $(OBJDIR)/descramble.o DEPS:=.deps DEPDIRS:=$(DEPS) $(DEPS)/drivers $(DEPS)/common $(DEPS)/malloc diff --git a/firmware/app.lds b/firmware/app.lds index 34ab1adf66..9fef2ec46d 100644 --- a/firmware/app.lds +++ b/firmware/app.lds @@ -55,8 +55,6 @@ SECTIONS /* We put the copy of the .iram section here to save space */ _iramcopy = .; . += 0x2000; - _topramcopy = .; - . += 0x300; _stackend = .; } > DRAM @@ -82,17 +80,11 @@ SECTIONS _mp3buffer = .; } > DRAM - .mp3end ENDADDR - 0x300: + .mp3end ENDADDR: { _mp3end = .; } > DRAM - .topram : AT ( _topramcopy ) { - _topramstart = .; - *(.topcode) - _topramend = .; - } > DRAM - .plugin ENDADDR: { _pluginbuf = .; diff --git a/firmware/crt0.S b/firmware/crt0.S index 0343fd1e23..99aab83867 100644 --- a/firmware/crt0.S +++ b/firmware/crt0.S @@ -112,20 +112,6 @@ copy_l: bf copy_l nop - /* copy the .topram section */ - mov.l topramcopy_k,r0 - mov.l topram_k,r1 - mov.l topramend_k,r2 -copy_l2: - mov.l @r0,r3 - mov.l r3,@r1 - add #4,r0 - add #4,r1 - cmp/ge r2,r1 - bf copy_l2 - nop - - /* Munge the main thread stack */ mov.l stack_k,r2 mov.l deadbeef_k,r0 @@ -164,12 +150,6 @@ iram_k: .long _iramstart iramend_k: .long _iramend -topramcopy_k: - .long _topramcopy -topram_k: - .long _topramstart -topramend_k: - .long _topramend main_k: .long _main vbr_k: diff --git a/firmware/descramble.S b/firmware/descramble.S new file mode 100644 index 0000000000..ccde5d0e0a --- /dev/null +++ b/firmware/descramble.S @@ -0,0 +1,132 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2003 by Magnus Holmgren + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + + .section .icode,"ax",@progbits + + .align 4 + .global _descramble + .type _descramble,@function + +/* Descramble a block of byte data, from source to dest, processing len + * bytes. Size only limited by the len argument. Note that len must + * be an even multiple of 4 (something rolo_load() already assumes. + * (Does the Archos firmware loader also require that?). + * + * Returns the 16-bit "sum" checksum of the descrambled data. + * + * Arguments: + * r4 - source (unsigned char*) + * r5 - dest (unsigned char*) + * r6 - len (unsigned int) + */ + +/* Register usage: + * i - r0 + * i4 - r1 + * checksum - r2 + * addr - r3 + * source - r4 + * dest - r5 + * len - r6 + * len4 - r7 + * data - r8 + * temp - r9 + */ + +_descramble: + mov.l r8,@-r15 + mov.l r9,@-r15 + mov #0,r0 /* i = 0 */ + mov #0,r1 /* i4 = i / 4 */ + mov #0,r2 /* checksum = 0 */ + mov r4,r3 /* addr = source */ + mov r6,r7 + shlr2 r7 /* len4 = len / 4 */ + +.loop: + mov.b @r3,r8 /* data = source[addr] */ + add r7,r3 /* addr += len4 */ + extu.b r8,r8 /* we want the data extended unsigned */ + shlr r8 /* start rotate right of low byte */ + movt r9 /* get bit 0 that was shifted out */ + shll8 r9 + shlr r9 /* move it to bit 7 */ + or r9,r8 /* finish rotate right */ + not r8,r8 + extu.b r8,r8 + mov.b r8,@(r0,r5) /* dest[i] = data */ + add r8,r2 /* checksum += data[i] */ + add #1,r0 /* i++ */ + tst #3,r0 /* reset addr? */ + bf .loop + + add #1,r1 /* i4++ */ + mov r4,r3 + add r1,r3 /* addr = source + i4 */ + cmp/hs r6,r0 /* all done? */ + bf .loop + + /* 17 cycles if no "reset," 22 if reset => average 18.25 cycles per + * byte, assuming no wait states from reads or writes. "Old" algorithm + * needed 24-26 cycles per byte, under the same assumptions. + */ + + mov.l @r15+,r9 + mov.l @r15+,r8 + rts + extu.w r2,r0 + + + +/* Move len bytes from source to dest (which must be suitably aligned for + * long moves) and jump to dest + 0x200. + * + * Arguments: + * r4 - source + * r5 - dest + * r6 - len + */ + + .global _rolo_restart + .type _rolo_restart,@function + +_rolo_restart: + mov.w .offset,r0 + mov r5,r7 + add r0,r7 /* start_func() */ + mov r6,r0 + shlr2 r0 + add #1,r0 +.copy: + mov.l @r4+,r1 + add #-1,r0 + mov.l r1,@r5 + add #4,r5 + cmp/eq #0,r0 + bf .copy + + jmp @r7 + + nop + +.offset: + .word 0x200 + +.end: + .size _descramble,.end-_descramble diff --git a/firmware/rolo.c b/firmware/rolo.c index 80dab75eea..e205c288f7 100644 --- a/firmware/rolo.c +++ b/firmware/rolo.c @@ -89,20 +89,24 @@ static void rolo_error(char *text) button_get(true); lcd_stop_scroll(); } + +/* these are in assembler file "descramble.S" */ +extern unsigned short descramble(unsigned char* source, unsigned char* dest, int length); +extern void rolo_restart(unsigned char* source, unsigned char* dest, int length); + /*************************************************************************** * * Name: rolo_load_app(char *filename,int scrambled) * Filename must be a fully defined filename including the path and extension * ***************************************************************************/ -int rolo_load(char* filename) __attribute__ ((section (".topcode"))); int rolo_load(char* filename) { - int fd,slen; - unsigned long length,file_length,i; + int fd; + unsigned long length; + unsigned long file_length; unsigned short checksum,file_checksum; unsigned char* ramstart = (void*)0x09000000; - void (*start_func)(void) = (void*)ramstart + 0x200; bool restore_io; /* debug value */ lcd_clear_display(); @@ -152,19 +156,9 @@ int rolo_load(char* filename) lcd_puts(0, 1, "Descramble"); lcd_update(); - /* descramble */ - slen = length/4; - for (i = 0; i < length; i++) { - unsigned long addr = ((i % slen) << 2) + i/slen; - unsigned char data = mp3buf[i+length]; - data = ~((data >> 1) | ((data << 7) & 0x80)); /* poor man's ROR */ - mp3buf[addr] = data; - } + checksum = descramble(mp3buf + length, mp3buf, length); - /* Compute checksum and verify against checksum from file header */ - checksum=0; - for (i=0; i