From 2c52dee83f5c796fe471e6fab15dea17a2f508ff Mon Sep 17 00:00:00 2001 From: Jens Arnold Date: Tue, 28 Oct 2008 21:07:53 +0000 Subject: [PATCH] Self-extractor for on-disk firmware image: UCL decompressor in SH1 assembler - less than half the size of the compiled C function, and ~45% faster. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@18904 a1c6a512-1295-4272-9138-f99709370657 --- firmware/decompressor/Makefile | 12 ++- firmware/decompressor/decompressor.c | 69 +----------- firmware/decompressor/sh_nrv2e_d8.S | 155 +++++++++++++++++++++++++++ 3 files changed, 164 insertions(+), 72 deletions(-) create mode 100644 firmware/decompressor/sh_nrv2e_d8.S diff --git a/firmware/decompressor/Makefile b/firmware/decompressor/Makefile index 46d7afe1b0..33b6affc6d 100644 --- a/firmware/decompressor/Makefile +++ b/firmware/decompressor/Makefile @@ -14,7 +14,8 @@ PRINTS=$(SILENT)$(call info,$(1)) LDS := link.lds LINKFILE = $(OBJDIR)/linkage.lds -OBJS := $(OBJDIR)/decompressor.o $(OBJDIR)/uclimage.o $(OBJDIR)/startup.o +OBJS := $(OBJDIR)/decompressor.o $(OBJDIR)/uclimage.o \ + $(OBJDIR)/sh_nrv2e_d8.o $(OBJDIR)/startup.o CFLAGS = $(GCCOPTS) all: $(OBJDIR)/compressed.bin @@ -25,9 +26,6 @@ $(OBJDIR)/compressed.bin : $(OBJDIR)/compressed.elf $(OBJDIR)/compressed.elf : $(OBJS) $(LINKFILE) $(call PRINTS,LD $(@F))$(CC) $(GCCOPTS) -Os -nostdlib -o $@ $(OBJS) -T$(LINKFILE) -Wl,-Map,$(OBJDIR)/compressed.map -$(LDS): $(OBJS) - - $(LINKFILE): $(LDS) $(call PRINTS,Build LDS file)cat $< | $(CC) -DMEMORYSIZE=$(MEMORYSIZE) $(INCLUDES) $(TARGET) $(DEFINES) -E -P $(ROMBUILD) - >$@ @@ -39,11 +37,15 @@ $(OBJDIR)/startup.o : startup.S $(SILENT)mkdir -p $(dir $@) $(call PRINTS,AS $<)$(CC) $(CFLAGS) -c $< -o $@ +$(OBJDIR)/sh_nrv2e_d8.o : sh_nrv2e_d8.S + $(SILENT)mkdir -p $(dir $@) + $(call PRINTS,AS $<)$(CC) $(CFLAGS) -c $< -o $@ + $(OBJDIR)/uclimage.o : $(OBJDIR)/uclimage.c $(SILENT)mkdir -p $(dir $@) $(call PRINTS,CC $(> 8) & 1) - -static int ucl_nrv2e_decompress_8(const unsigned char *src, unsigned char *dst, - unsigned long *dst_len) -{ - unsigned long bb = 0; - unsigned ilen = 0, olen = 0, last_m_off = 1; - - for (;;) - { - unsigned m_off, m_len; - - while (GETBIT(bb,src,ilen)) - dst[olen++] = src[ilen++]; - - m_off = 1; - for (;;) - { - m_off = m_off*2 + GETBIT(bb,src,ilen); - if (GETBIT(bb,src,ilen)) - break; - m_off = (m_off-1)*2 + GETBIT(bb,src,ilen); - } - if (m_off == 2) - { - m_off = last_m_off; - m_len = GETBIT(bb,src,ilen); - } - else - { - m_off = (m_off-3)*256 + src[ilen++]; - if (m_off == 0xffffffff) - break; - m_len = (m_off ^ 0xffffffff) & 1; - m_off >>= 1; - last_m_off = ++m_off; - } - if (m_len) - m_len = 1 + GETBIT(bb,src,ilen); - else if (GETBIT(bb,src,ilen)) - m_len = 3 + GETBIT(bb,src,ilen); - else - { - m_len++; - do { - m_len = m_len*2 + GETBIT(bb,src,ilen); - } while (!GETBIT(bb,src,ilen)); - m_len += 3; - } - m_len += (m_off > 0x500); - { - const unsigned char *m_pos; - m_pos = dst + olen - m_off; - dst[olen++] = *m_pos++; - do dst[olen++] = *m_pos++; while (--m_len > 0); - } - } - *dst_len = olen; - - return ilen; -} - #define ALIGNED_IMG_SIZE ((sizeof(image) + 3) & ~3) /* This will never return */ void main(void) diff --git a/firmware/decompressor/sh_nrv2e_d8.S b/firmware/decompressor/sh_nrv2e_d8.S new file mode 100644 index 0000000000..c002911c0c --- /dev/null +++ b/firmware/decompressor/sh_nrv2e_d8.S @@ -0,0 +1,155 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2008 by Jens Arnold + * + * based on arm_nrv2e_d8.S -- ARM decompressor for NRV2E + * Copyright (C) 1996-2008 Markus Franz Xaver Johannes Oberhumer + * Copyright (C) 1996-2008 Laszlo Molnar + * Copyright (C) 2000-2008 John F. Reiser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#define src r4 +#define dst r5 +#define len r6 /* overlaps 'cnt' */ +#define cnt r6 /* overlaps 'len' while reading an offset */ +#define tmp r7 + +#define off r0 /* must be r0 because of indexed addressing */ +#define bits r1 +#define bitmask r2 +#define wrnk r3 /* -0x500 -M2_MAX_OFFSET before "wrinkle" */ + + +#define GETBIT \ + tst bits, bitmask; \ + bf 1f; \ + bsr get1_n2e; \ +1: \ + shll bits /* using the delay slot on purpose */ + +#define getnextb(reg) GETBIT; rotcl reg +#define jnextb0 GETBIT; bf +#define jnextb1 GETBIT; bt + + .section .icode,"ax",@progbits + .align 2 + .global _ucl_nrv2e_decompress_8 + .type _ucl_nrv2e_decompress_8,@function + +/* src_len = ucl_nrv2e_decompress_8(const unsigned char *src, + * unsigned char *dst, + * unsigned long *dst_len) + */ + +_ucl_nrv2e_decompress_8: + sts.l pr, @-r15 + mov #-1, off ! off = -1 initial condition + mov.l r6, @-r15 + mov #-5, wrnk + mov.l r5, @-r15 + shll8 wrnk ! nrv2e -M2_MAX_OFFSET + mov.l r4, @-r15 + mov #-1, bitmask + shlr bitmask ! 0x7fffffff for testing before shifting + bra top_n2e + not bitmask, bits ! refill next time (MSB must be set) + +eof_n2e: + mov.l @r15+, r0 ! r0 = orig_src + mov.l @r15+, r1 ! r1 = orig_dst + sub r0, src + mov.l @r15+, r2 ! r2 = plen_dst + sub r1, dst + mov.l dst, @r2 + lds.l @r15+, pr + rts + mov src, r0 + + .align 2 +get1_n2e: ! in: T bit set + mov.b @src+, bits ! SH1 sign-extends on load + rotcl bits ! LSB = T, T = MSB + shll16 bits + rts + shll8 bits + + .align 2 +lit_n2e: + mov.b @src, tmp + add #1, src ! Need to fill the pipeline latency anyway + mov.b tmp, @dst + add #1, dst +top_n2e: + jnextb1 lit_n2e + bra getoff_n2e + mov #1, cnt + +off_n2e: + add #-1, cnt + getnextb(cnt) +getoff_n2e: + getnextb(cnt) + jnextb0 off_n2e + + mov cnt, tmp + mov #0, len ! cnt and len share a reg! + add #-3, tmp + cmp/pz tmp + bf offprev_n2e ! cnt was 2 + mov.b @src+, off ! low 7+1 bits + shll8 tmp + extu.b off, off + or tmp, off + not off, off ! off = ~off + tst off, off + bt eof_n2e + shar off + bt lenlast_n2e + bra lenmore_n2e + mov #1, len + +offprev_n2e: + jnextb1 lenlast_n2e + mov #1, len +lenmore_n2e: + jnextb1 lenlast_n2e +len_n2e: + getnextb(len) + jnextb0 len_n2e + bra gotlen_n2e + add #6-2, len + +lenlast_n2e: + getnextb(len) ! 0,1,2,3 + add #2, len +gotlen_n2e: + cmp/gt off, wrnk + movt tmp ! too far away, so minimum match length is 3 + add tmp, len +copy_n2e: + add #-1, len + mov.b @(off,dst), tmp + tst len, len + mov.b tmp, @dst + add #1, dst + bf copy_n2e + bra top_n2e + nop + + .size ucl_nrv2e_decompress_8, .-ucl_nrv2e_decompress_8