Self-extractor for on-disk firmware image: UCL decompressor in SH1 assembler - less than half the size of the compiled C function, and ~45% faster.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@18904 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Jens Arnold 2008-10-28 21:07:53 +00:00
parent afd2f681d1
commit 2c52dee83f
3 changed files with 164 additions and 72 deletions

View file

@ -14,7 +14,8 @@ PRINTS=$(SILENT)$(call info,$(1))
LDS := link.lds
LINKFILE = $(OBJDIR)/linkage.lds
OBJS := $(OBJDIR)/decompressor.o $(OBJDIR)/uclimage.o $(OBJDIR)/startup.o
OBJS := $(OBJDIR)/decompressor.o $(OBJDIR)/uclimage.o \
$(OBJDIR)/sh_nrv2e_d8.o $(OBJDIR)/startup.o
CFLAGS = $(GCCOPTS)
all: $(OBJDIR)/compressed.bin
@ -25,9 +26,6 @@ $(OBJDIR)/compressed.bin : $(OBJDIR)/compressed.elf
$(OBJDIR)/compressed.elf : $(OBJS) $(LINKFILE)
$(call PRINTS,LD $(@F))$(CC) $(GCCOPTS) -Os -nostdlib -o $@ $(OBJS) -T$(LINKFILE) -Wl,-Map,$(OBJDIR)/compressed.map
$(LDS): $(OBJS)
$(LINKFILE): $(LDS)
$(call PRINTS,Build LDS file)cat $< | $(CC) -DMEMORYSIZE=$(MEMORYSIZE) $(INCLUDES) $(TARGET) $(DEFINES) -E -P $(ROMBUILD) - >$@
@ -39,11 +37,15 @@ $(OBJDIR)/startup.o : startup.S
$(SILENT)mkdir -p $(dir $@)
$(call PRINTS,AS $<)$(CC) $(CFLAGS) -c $< -o $@
$(OBJDIR)/sh_nrv2e_d8.o : sh_nrv2e_d8.S
$(SILENT)mkdir -p $(dir $@)
$(call PRINTS,AS $<)$(CC) $(CFLAGS) -c $< -o $@
$(OBJDIR)/uclimage.o : $(OBJDIR)/uclimage.c
$(SILENT)mkdir -p $(dir $@)
$(call PRINTS,CC $(<F))$(CC) $(CFLAGS) -c $< -o $@
$(OBJDIR)/uclimage.c : $(FLASHFILE) $(TOOLSDIR)/ucl2src.pl
$(SILENT)mkdir -p $(dir $@)
$(call PRINTS,UCL2SRC)perl -s $(TOOLSDIR)/ucl2src.pl -p=$(OBJDIR)/uclimage $<
$(call PRINTS,UCL2SRC $(<F))perl -s $(TOOLSDIR)/ucl2src.pl -p=$(OBJDIR)/uclimage $<

View file

@ -36,8 +36,8 @@ extern char loadaddress[], dramend[];
extern void start(void);
void main(void) ICODE_ATTR;
static int ucl_nrv2e_decompress_8(const unsigned char *src, unsigned char *dst,
unsigned long *dst_len) ICODE_ATTR;
int ucl_nrv2e_decompress_8(const unsigned char *src, unsigned char *dst,
unsigned long *dst_len) ICODE_ATTR;
/* Vector table */
void (*vbr[]) (void) __attribute__ ((section (".vectors"))) =
@ -50,71 +50,6 @@ void (*vbr[]) (void) __attribute__ ((section (".vectors"))) =
/** All subsequent functions are executed from IRAM **/
/* Thinned out version of the UCL 2e decompression sourcecode
* Original (C) Markus F.X.J Oberhumer under GNU GPL license */
#define GETBIT(bb, src, ilen) \
(((bb = bb & 0x7f ? bb*2 : ((unsigned)src[ilen++]*2+1)) >> 8) & 1)
static int ucl_nrv2e_decompress_8(const unsigned char *src, unsigned char *dst,
unsigned long *dst_len)
{
unsigned long bb = 0;
unsigned ilen = 0, olen = 0, last_m_off = 1;
for (;;)
{
unsigned m_off, m_len;
while (GETBIT(bb,src,ilen))
dst[olen++] = src[ilen++];
m_off = 1;
for (;;)
{
m_off = m_off*2 + GETBIT(bb,src,ilen);
if (GETBIT(bb,src,ilen))
break;
m_off = (m_off-1)*2 + GETBIT(bb,src,ilen);
}
if (m_off == 2)
{
m_off = last_m_off;
m_len = GETBIT(bb,src,ilen);
}
else
{
m_off = (m_off-3)*256 + src[ilen++];
if (m_off == 0xffffffff)
break;
m_len = (m_off ^ 0xffffffff) & 1;
m_off >>= 1;
last_m_off = ++m_off;
}
if (m_len)
m_len = 1 + GETBIT(bb,src,ilen);
else if (GETBIT(bb,src,ilen))
m_len = 3 + GETBIT(bb,src,ilen);
else
{
m_len++;
do {
m_len = m_len*2 + GETBIT(bb,src,ilen);
} while (!GETBIT(bb,src,ilen));
m_len += 3;
}
m_len += (m_off > 0x500);
{
const unsigned char *m_pos;
m_pos = dst + olen - m_off;
dst[olen++] = *m_pos++;
do dst[olen++] = *m_pos++; while (--m_len > 0);
}
}
*dst_len = olen;
return ilen;
}
#define ALIGNED_IMG_SIZE ((sizeof(image) + 3) & ~3)
/* This will never return */
void main(void)

View file

@ -0,0 +1,155 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2008 by Jens Arnold
*
* based on arm_nrv2e_d8.S -- ARM decompressor for NRV2E
* Copyright (C) 1996-2008 Markus Franz Xaver Johannes Oberhumer
* Copyright (C) 1996-2008 Laszlo Molnar
* Copyright (C) 2000-2008 John F. Reiser
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#define src r4
#define dst r5
#define len r6 /* overlaps 'cnt' */
#define cnt r6 /* overlaps 'len' while reading an offset */
#define tmp r7
#define off r0 /* must be r0 because of indexed addressing */
#define bits r1
#define bitmask r2
#define wrnk r3 /* -0x500 -M2_MAX_OFFSET before "wrinkle" */
#define GETBIT \
tst bits, bitmask; \
bf 1f; \
bsr get1_n2e; \
1: \
shll bits /* using the delay slot on purpose */
#define getnextb(reg) GETBIT; rotcl reg
#define jnextb0 GETBIT; bf
#define jnextb1 GETBIT; bt
.section .icode,"ax",@progbits
.align 2
.global _ucl_nrv2e_decompress_8
.type _ucl_nrv2e_decompress_8,@function
/* src_len = ucl_nrv2e_decompress_8(const unsigned char *src,
* unsigned char *dst,
* unsigned long *dst_len)
*/
_ucl_nrv2e_decompress_8:
sts.l pr, @-r15
mov #-1, off ! off = -1 initial condition
mov.l r6, @-r15
mov #-5, wrnk
mov.l r5, @-r15
shll8 wrnk ! nrv2e -M2_MAX_OFFSET
mov.l r4, @-r15
mov #-1, bitmask
shlr bitmask ! 0x7fffffff for testing before shifting
bra top_n2e
not bitmask, bits ! refill next time (MSB must be set)
eof_n2e:
mov.l @r15+, r0 ! r0 = orig_src
mov.l @r15+, r1 ! r1 = orig_dst
sub r0, src
mov.l @r15+, r2 ! r2 = plen_dst
sub r1, dst
mov.l dst, @r2
lds.l @r15+, pr
rts
mov src, r0
.align 2
get1_n2e: ! in: T bit set
mov.b @src+, bits ! SH1 sign-extends on load
rotcl bits ! LSB = T, T = MSB
shll16 bits
rts
shll8 bits
.align 2
lit_n2e:
mov.b @src, tmp
add #1, src ! Need to fill the pipeline latency anyway
mov.b tmp, @dst
add #1, dst
top_n2e:
jnextb1 lit_n2e
bra getoff_n2e
mov #1, cnt
off_n2e:
add #-1, cnt
getnextb(cnt)
getoff_n2e:
getnextb(cnt)
jnextb0 off_n2e
mov cnt, tmp
mov #0, len ! cnt and len share a reg!
add #-3, tmp
cmp/pz tmp
bf offprev_n2e ! cnt was 2
mov.b @src+, off ! low 7+1 bits
shll8 tmp
extu.b off, off
or tmp, off
not off, off ! off = ~off
tst off, off
bt eof_n2e
shar off
bt lenlast_n2e
bra lenmore_n2e
mov #1, len
offprev_n2e:
jnextb1 lenlast_n2e
mov #1, len
lenmore_n2e:
jnextb1 lenlast_n2e
len_n2e:
getnextb(len)
jnextb0 len_n2e
bra gotlen_n2e
add #6-2, len
lenlast_n2e:
getnextb(len) ! 0,1,2,3
add #2, len
gotlen_n2e:
cmp/gt off, wrnk
movt tmp ! too far away, so minimum match length is 3
add tmp, len
copy_n2e:
add #-1, len
mov.b @(off,dst), tmp
tst len, len
mov.b tmp, @dst
add #1, dst
bf copy_n2e
bra top_n2e
nop
.size ucl_nrv2e_decompress_8, .-ucl_nrv2e_decompress_8