Self-extractor for on-disk firmware image: UCL decompressor in SH1 assembler - less than half the size of the compiled C function, and ~45% faster.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@18904 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
afd2f681d1
commit
2c52dee83f
3 changed files with 164 additions and 72 deletions
|
@ -14,7 +14,8 @@ PRINTS=$(SILENT)$(call info,$(1))
|
|||
|
||||
LDS := link.lds
|
||||
LINKFILE = $(OBJDIR)/linkage.lds
|
||||
OBJS := $(OBJDIR)/decompressor.o $(OBJDIR)/uclimage.o $(OBJDIR)/startup.o
|
||||
OBJS := $(OBJDIR)/decompressor.o $(OBJDIR)/uclimage.o \
|
||||
$(OBJDIR)/sh_nrv2e_d8.o $(OBJDIR)/startup.o
|
||||
CFLAGS = $(GCCOPTS)
|
||||
|
||||
all: $(OBJDIR)/compressed.bin
|
||||
|
@ -25,9 +26,6 @@ $(OBJDIR)/compressed.bin : $(OBJDIR)/compressed.elf
|
|||
$(OBJDIR)/compressed.elf : $(OBJS) $(LINKFILE)
|
||||
$(call PRINTS,LD $(@F))$(CC) $(GCCOPTS) -Os -nostdlib -o $@ $(OBJS) -T$(LINKFILE) -Wl,-Map,$(OBJDIR)/compressed.map
|
||||
|
||||
$(LDS): $(OBJS)
|
||||
|
||||
|
||||
$(LINKFILE): $(LDS)
|
||||
$(call PRINTS,Build LDS file)cat $< | $(CC) -DMEMORYSIZE=$(MEMORYSIZE) $(INCLUDES) $(TARGET) $(DEFINES) -E -P $(ROMBUILD) - >$@
|
||||
|
||||
|
@ -39,11 +37,15 @@ $(OBJDIR)/startup.o : startup.S
|
|||
$(SILENT)mkdir -p $(dir $@)
|
||||
$(call PRINTS,AS $<)$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
$(OBJDIR)/sh_nrv2e_d8.o : sh_nrv2e_d8.S
|
||||
$(SILENT)mkdir -p $(dir $@)
|
||||
$(call PRINTS,AS $<)$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
$(OBJDIR)/uclimage.o : $(OBJDIR)/uclimage.c
|
||||
$(SILENT)mkdir -p $(dir $@)
|
||||
$(call PRINTS,CC $(<F))$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
$(OBJDIR)/uclimage.c : $(FLASHFILE) $(TOOLSDIR)/ucl2src.pl
|
||||
$(SILENT)mkdir -p $(dir $@)
|
||||
$(call PRINTS,UCL2SRC)perl -s $(TOOLSDIR)/ucl2src.pl -p=$(OBJDIR)/uclimage $<
|
||||
$(call PRINTS,UCL2SRC $(<F))perl -s $(TOOLSDIR)/ucl2src.pl -p=$(OBJDIR)/uclimage $<
|
||||
|
||||
|
|
|
@ -36,8 +36,8 @@ extern char loadaddress[], dramend[];
|
|||
extern void start(void);
|
||||
|
||||
void main(void) ICODE_ATTR;
|
||||
static int ucl_nrv2e_decompress_8(const unsigned char *src, unsigned char *dst,
|
||||
unsigned long *dst_len) ICODE_ATTR;
|
||||
int ucl_nrv2e_decompress_8(const unsigned char *src, unsigned char *dst,
|
||||
unsigned long *dst_len) ICODE_ATTR;
|
||||
|
||||
/* Vector table */
|
||||
void (*vbr[]) (void) __attribute__ ((section (".vectors"))) =
|
||||
|
@ -50,71 +50,6 @@ void (*vbr[]) (void) __attribute__ ((section (".vectors"))) =
|
|||
|
||||
/** All subsequent functions are executed from IRAM **/
|
||||
|
||||
/* Thinned out version of the UCL 2e decompression sourcecode
|
||||
* Original (C) Markus F.X.J Oberhumer under GNU GPL license */
|
||||
#define GETBIT(bb, src, ilen) \
|
||||
(((bb = bb & 0x7f ? bb*2 : ((unsigned)src[ilen++]*2+1)) >> 8) & 1)
|
||||
|
||||
static int ucl_nrv2e_decompress_8(const unsigned char *src, unsigned char *dst,
|
||||
unsigned long *dst_len)
|
||||
{
|
||||
unsigned long bb = 0;
|
||||
unsigned ilen = 0, olen = 0, last_m_off = 1;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
unsigned m_off, m_len;
|
||||
|
||||
while (GETBIT(bb,src,ilen))
|
||||
dst[olen++] = src[ilen++];
|
||||
|
||||
m_off = 1;
|
||||
for (;;)
|
||||
{
|
||||
m_off = m_off*2 + GETBIT(bb,src,ilen);
|
||||
if (GETBIT(bb,src,ilen))
|
||||
break;
|
||||
m_off = (m_off-1)*2 + GETBIT(bb,src,ilen);
|
||||
}
|
||||
if (m_off == 2)
|
||||
{
|
||||
m_off = last_m_off;
|
||||
m_len = GETBIT(bb,src,ilen);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_off = (m_off-3)*256 + src[ilen++];
|
||||
if (m_off == 0xffffffff)
|
||||
break;
|
||||
m_len = (m_off ^ 0xffffffff) & 1;
|
||||
m_off >>= 1;
|
||||
last_m_off = ++m_off;
|
||||
}
|
||||
if (m_len)
|
||||
m_len = 1 + GETBIT(bb,src,ilen);
|
||||
else if (GETBIT(bb,src,ilen))
|
||||
m_len = 3 + GETBIT(bb,src,ilen);
|
||||
else
|
||||
{
|
||||
m_len++;
|
||||
do {
|
||||
m_len = m_len*2 + GETBIT(bb,src,ilen);
|
||||
} while (!GETBIT(bb,src,ilen));
|
||||
m_len += 3;
|
||||
}
|
||||
m_len += (m_off > 0x500);
|
||||
{
|
||||
const unsigned char *m_pos;
|
||||
m_pos = dst + olen - m_off;
|
||||
dst[olen++] = *m_pos++;
|
||||
do dst[olen++] = *m_pos++; while (--m_len > 0);
|
||||
}
|
||||
}
|
||||
*dst_len = olen;
|
||||
|
||||
return ilen;
|
||||
}
|
||||
|
||||
#define ALIGNED_IMG_SIZE ((sizeof(image) + 3) & ~3)
|
||||
/* This will never return */
|
||||
void main(void)
|
||||
|
|
155
firmware/decompressor/sh_nrv2e_d8.S
Normal file
155
firmware/decompressor/sh_nrv2e_d8.S
Normal file
|
@ -0,0 +1,155 @@
|
|||
/***************************************************************************
|
||||
* __________ __ ___.
|
||||
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||
* \/ \/ \/ \/ \/
|
||||
* $Id$
|
||||
*
|
||||
* Copyright (C) 2008 by Jens Arnold
|
||||
*
|
||||
* based on arm_nrv2e_d8.S -- ARM decompressor for NRV2E
|
||||
* Copyright (C) 1996-2008 Markus Franz Xaver Johannes Oberhumer
|
||||
* Copyright (C) 1996-2008 Laszlo Molnar
|
||||
* Copyright (C) 2000-2008 John F. Reiser
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||
* KIND, either express or implied.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#define src r4
|
||||
#define dst r5
|
||||
#define len r6 /* overlaps 'cnt' */
|
||||
#define cnt r6 /* overlaps 'len' while reading an offset */
|
||||
#define tmp r7
|
||||
|
||||
#define off r0 /* must be r0 because of indexed addressing */
|
||||
#define bits r1
|
||||
#define bitmask r2
|
||||
#define wrnk r3 /* -0x500 -M2_MAX_OFFSET before "wrinkle" */
|
||||
|
||||
|
||||
#define GETBIT \
|
||||
tst bits, bitmask; \
|
||||
bf 1f; \
|
||||
bsr get1_n2e; \
|
||||
1: \
|
||||
shll bits /* using the delay slot on purpose */
|
||||
|
||||
#define getnextb(reg) GETBIT; rotcl reg
|
||||
#define jnextb0 GETBIT; bf
|
||||
#define jnextb1 GETBIT; bt
|
||||
|
||||
.section .icode,"ax",@progbits
|
||||
.align 2
|
||||
.global _ucl_nrv2e_decompress_8
|
||||
.type _ucl_nrv2e_decompress_8,@function
|
||||
|
||||
/* src_len = ucl_nrv2e_decompress_8(const unsigned char *src,
|
||||
* unsigned char *dst,
|
||||
* unsigned long *dst_len)
|
||||
*/
|
||||
|
||||
_ucl_nrv2e_decompress_8:
|
||||
sts.l pr, @-r15
|
||||
mov #-1, off ! off = -1 initial condition
|
||||
mov.l r6, @-r15
|
||||
mov #-5, wrnk
|
||||
mov.l r5, @-r15
|
||||
shll8 wrnk ! nrv2e -M2_MAX_OFFSET
|
||||
mov.l r4, @-r15
|
||||
mov #-1, bitmask
|
||||
shlr bitmask ! 0x7fffffff for testing before shifting
|
||||
bra top_n2e
|
||||
not bitmask, bits ! refill next time (MSB must be set)
|
||||
|
||||
eof_n2e:
|
||||
mov.l @r15+, r0 ! r0 = orig_src
|
||||
mov.l @r15+, r1 ! r1 = orig_dst
|
||||
sub r0, src
|
||||
mov.l @r15+, r2 ! r2 = plen_dst
|
||||
sub r1, dst
|
||||
mov.l dst, @r2
|
||||
lds.l @r15+, pr
|
||||
rts
|
||||
mov src, r0
|
||||
|
||||
.align 2
|
||||
get1_n2e: ! in: T bit set
|
||||
mov.b @src+, bits ! SH1 sign-extends on load
|
||||
rotcl bits ! LSB = T, T = MSB
|
||||
shll16 bits
|
||||
rts
|
||||
shll8 bits
|
||||
|
||||
.align 2
|
||||
lit_n2e:
|
||||
mov.b @src, tmp
|
||||
add #1, src ! Need to fill the pipeline latency anyway
|
||||
mov.b tmp, @dst
|
||||
add #1, dst
|
||||
top_n2e:
|
||||
jnextb1 lit_n2e
|
||||
bra getoff_n2e
|
||||
mov #1, cnt
|
||||
|
||||
off_n2e:
|
||||
add #-1, cnt
|
||||
getnextb(cnt)
|
||||
getoff_n2e:
|
||||
getnextb(cnt)
|
||||
jnextb0 off_n2e
|
||||
|
||||
mov cnt, tmp
|
||||
mov #0, len ! cnt and len share a reg!
|
||||
add #-3, tmp
|
||||
cmp/pz tmp
|
||||
bf offprev_n2e ! cnt was 2
|
||||
mov.b @src+, off ! low 7+1 bits
|
||||
shll8 tmp
|
||||
extu.b off, off
|
||||
or tmp, off
|
||||
not off, off ! off = ~off
|
||||
tst off, off
|
||||
bt eof_n2e
|
||||
shar off
|
||||
bt lenlast_n2e
|
||||
bra lenmore_n2e
|
||||
mov #1, len
|
||||
|
||||
offprev_n2e:
|
||||
jnextb1 lenlast_n2e
|
||||
mov #1, len
|
||||
lenmore_n2e:
|
||||
jnextb1 lenlast_n2e
|
||||
len_n2e:
|
||||
getnextb(len)
|
||||
jnextb0 len_n2e
|
||||
bra gotlen_n2e
|
||||
add #6-2, len
|
||||
|
||||
lenlast_n2e:
|
||||
getnextb(len) ! 0,1,2,3
|
||||
add #2, len
|
||||
gotlen_n2e:
|
||||
cmp/gt off, wrnk
|
||||
movt tmp ! too far away, so minimum match length is 3
|
||||
add tmp, len
|
||||
copy_n2e:
|
||||
add #-1, len
|
||||
mov.b @(off,dst), tmp
|
||||
tst len, len
|
||||
mov.b tmp, @dst
|
||||
add #1, dst
|
||||
bf copy_n2e
|
||||
bra top_n2e
|
||||
nop
|
||||
|
||||
.size ucl_nrv2e_decompress_8, .-ucl_nrv2e_decompress_8
|
Loading…
Reference in a new issue