rockbox/rbutil/rbutilqt/mspack/chmd.c
Amaury Pouly 289acf3333 Fix libmspack to compile with rbutil
The build system used by rbutil is not very robust: all the files
are eventually compiled to the same directory so we cannot have
two files with the same name (system.c would conflicts with
system.cpp) so rename one file to avoid this. Also change all
include directives to local ones because we don't have to expose
the entire mspack source to inclusion.

Change-Id: I3fe0638d69fdc30566eb9425abfe33c807678b28
Reviewed-on: http://gerrit.rockbox.org/417
Reviewed-by: Dominik Riebeling <Dominik.Riebeling@gmail.com>
2013-11-04 22:15:00 +01:00

1346 lines
41 KiB
C

/* This file is part of libmspack.
* (C) 2003-2011 Stuart Caie.
*
* libmspack is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License (LGPL) version 2.1
*
* For further details, see the file COPYING.LIB distributed with libmspack
*/
/* CHM decompression implementation */
#include "system-mspack.h"
#include "chm.h"
/* prototypes */
static struct mschmd_header * chmd_open(
struct mschm_decompressor *base, const char *filename);
static struct mschmd_header * chmd_fast_open(
struct mschm_decompressor *base, const char *filename);
static struct mschmd_header *chmd_real_open(
struct mschm_decompressor *base, const char *filename, int entire);
static void chmd_close(
struct mschm_decompressor *base, struct mschmd_header *chm);
static int chmd_read_headers(
struct mspack_system *sys, struct mspack_file *fh,
struct mschmd_header *chm, int entire);
static int chmd_fast_find(
struct mschm_decompressor *base, struct mschmd_header *chm,
const char *filename, struct mschmd_file *f_ptr, int f_size);
static unsigned char *read_chunk(
struct mschm_decompressor_p *self, struct mschmd_header *chm,
struct mspack_file *fh, unsigned int chunk);
static int search_chunk(
struct mschmd_header *chm, const unsigned char *chunk, const char *filename,
const unsigned char **result, const unsigned char **result_end);
static inline int compare(
const char *s1, const char *s2, int l1, int l2);
static int chmd_extract(
struct mschm_decompressor *base, struct mschmd_file *file,
const char *filename);
static int chmd_sys_write(
struct mspack_file *file, void *buffer, int bytes);
static int chmd_init_decomp(
struct mschm_decompressor_p *self, struct mschmd_file *file);
static int read_reset_table(
struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec,
int entry, off_t *length_ptr, off_t *offset_ptr);
static int read_spaninfo(
struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec,
off_t *length_ptr);
static int find_sys_file(
struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec,
struct mschmd_file **f_ptr, const char *name);
static unsigned char *read_sys_file(
struct mschm_decompressor_p *self, struct mschmd_file *file);
static int chmd_error(
struct mschm_decompressor *base);
static int read_off64(
off_t *var, unsigned char *mem, struct mspack_system *sys,
struct mspack_file *fh);
/* filenames of the system files used for decompression.
* Content and ControlData are essential.
* ResetTable is preferred, but SpanInfo can be used if not available
*/
static const char *content_name = "::DataSpace/Storage/MSCompressed/Content";
static const char *control_name = "::DataSpace/Storage/MSCompressed/ControlData";
static const char *spaninfo_name = "::DataSpace/Storage/MSCompressed/SpanInfo";
static const char *rtable_name = "::DataSpace/Storage/MSCompressed/Transform/"
"{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable";
/***************************************
* MSPACK_CREATE_CHM_DECOMPRESSOR
***************************************
* constructor
*/
struct mschm_decompressor *
mspack_create_chm_decompressor(struct mspack_system *sys)
{
struct mschm_decompressor_p *self = NULL;
if (!sys) sys = mspack_default_system;
if (!mspack_valid_system(sys)) return NULL;
if ((self = (struct mschm_decompressor_p *) sys->alloc(sys, sizeof(struct mschm_decompressor_p)))) {
self->base.open = &chmd_open;
self->base.close = &chmd_close;
self->base.extract = &chmd_extract;
self->base.last_error = &chmd_error;
self->base.fast_open = &chmd_fast_open;
self->base.fast_find = &chmd_fast_find;
self->system = sys;
self->error = MSPACK_ERR_OK;
self->d = NULL;
}
return (struct mschm_decompressor *) self;
}
/***************************************
* MSPACK_DESTROY_CAB_DECOMPRESSOR
***************************************
* destructor
*/
void mspack_destroy_chm_decompressor(struct mschm_decompressor *base) {
struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base;
if (self) {
struct mspack_system *sys = self->system;
if (self->d) {
if (self->d->infh) sys->close(self->d->infh);
if (self->d->state) lzxd_free(self->d->state);
sys->free(self->d);
}
sys->free(self);
}
}
/***************************************
* CHMD_OPEN
***************************************
* opens a file and tries to read it as a CHM file.
* Calls chmd_real_open() with entire=1.
*/
static struct mschmd_header *chmd_open(struct mschm_decompressor *base,
const char *filename)
{
return chmd_real_open(base, filename, 1);
}
/***************************************
* CHMD_FAST_OPEN
***************************************
* opens a file and tries to read it as a CHM file, but does not read
* the file headers. Calls chmd_real_open() with entire=0
*/
static struct mschmd_header *chmd_fast_open(struct mschm_decompressor *base,
const char *filename)
{
return chmd_real_open(base, filename, 0);
}
/***************************************
* CHMD_REAL_OPEN
***************************************
* the real implementation of chmd_open() and chmd_fast_open(). It simply
* passes the "entire" parameter to chmd_read_headers(), which will then
* either read all headers, or a bare mininum.
*/
static struct mschmd_header *chmd_real_open(struct mschm_decompressor *base,
const char *filename, int entire)
{
struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base;
struct mschmd_header *chm = NULL;
struct mspack_system *sys;
struct mspack_file *fh;
int error;
if (!base) return NULL;
sys = self->system;
if ((fh = sys->open(sys, filename, MSPACK_SYS_OPEN_READ))) {
if ((chm = (struct mschmd_header *) sys->alloc(sys, sizeof(struct mschmd_header)))) {
chm->filename = filename;
error = chmd_read_headers(sys, fh, chm, entire);
if (error) {
/* if the error is DATAFORMAT, and there are some results, return
* partial results with a warning, rather than nothing */
if (error == MSPACK_ERR_DATAFORMAT && (chm->files || chm->sysfiles)) {
sys->message(fh, "WARNING; contents are corrupt");
error = MSPACK_ERR_OK;
}
else {
chmd_close(base, chm);
chm = NULL;
}
}
self->error = error;
}
else {
self->error = MSPACK_ERR_NOMEMORY;
}
sys->close(fh);
}
else {
self->error = MSPACK_ERR_OPEN;
}
return chm;
}
/***************************************
* CHMD_CLOSE
***************************************
* frees all memory associated with a given mschmd_header
*/
static void chmd_close(struct mschm_decompressor *base,
struct mschmd_header *chm)
{
struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base;
struct mschmd_file *fi, *nfi;
struct mspack_system *sys;
unsigned int i;
if (!base) return;
sys = self->system;
self->error = MSPACK_ERR_OK;
/* free files */
for (fi = chm->files; fi; fi = nfi) {
nfi = fi->next;
sys->free(fi);
}
for (fi = chm->sysfiles; fi; fi = nfi) {
nfi = fi->next;
sys->free(fi);
}
/* if this CHM was being decompressed, free decompression state */
if (self->d && (self->d->chm == chm)) {
if (self->d->infh) sys->close(self->d->infh);
if (self->d->state) lzxd_free(self->d->state);
sys->free(self->d);
self->d = NULL;
}
/* if this CHM had a chunk cache, free it and contents */
if (chm->chunk_cache) {
for (i = 0; i < chm->num_chunks; i++) sys->free(chm->chunk_cache[i]);
sys->free(chm->chunk_cache);
}
sys->free(chm);
}
/***************************************
* CHMD_READ_HEADERS
***************************************
* reads the basic CHM file headers. If the "entire" parameter is
* non-zero, all file entries will also be read. fills out a pre-existing
* mschmd_header structure, allocates memory for files as necessary
*/
/* The GUIDs found in CHM headers */
static const unsigned char guids[32] = {
/* {7C01FD10-7BAA-11D0-9E0C-00A0-C922-E6EC} */
0x10, 0xFD, 0x01, 0x7C, 0xAA, 0x7B, 0xD0, 0x11,
0x9E, 0x0C, 0x00, 0xA0, 0xC9, 0x22, 0xE6, 0xEC,
/* {7C01FD11-7BAA-11D0-9E0C-00A0-C922-E6EC} */
0x11, 0xFD, 0x01, 0x7C, 0xAA, 0x7B, 0xD0, 0x11,
0x9E, 0x0C, 0x00, 0xA0, 0xC9, 0x22, 0xE6, 0xEC
};
/* reads an encoded integer into a variable; 7 bits of data per byte,
* the high bit is used to indicate that there is another byte */
#define READ_ENCINT(var) do { \
(var) = 0; \
do { \
if (p > end) goto chunk_end; \
(var) = ((var) << 7) | (*p & 0x7F); \
} while (*p++ & 0x80); \
} while (0)
static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh,
struct mschmd_header *chm, int entire)
{
unsigned int section, name_len, x, errors, num_chunks;
unsigned char buf[0x54], *chunk = NULL, *name, *p, *end;
struct mschmd_file *fi, *link = NULL;
off_t offset, length;
int num_entries;
/* initialise pointers */
chm->files = NULL;
chm->sysfiles = NULL;
chm->chunk_cache = NULL;
chm->sec0.base.chm = chm;
chm->sec0.base.id = 0;
chm->sec1.base.chm = chm;
chm->sec1.base.id = 1;
chm->sec1.content = NULL;
chm->sec1.control = NULL;
chm->sec1.spaninfo = NULL;
chm->sec1.rtable = NULL;
/* read the first header */
if (sys->read(fh, &buf[0], chmhead_SIZEOF) != chmhead_SIZEOF) {
return MSPACK_ERR_READ;
}
/* check ITSF signature */
if (EndGetI32(&buf[chmhead_Signature]) != 0x46535449) {
return MSPACK_ERR_SIGNATURE;
}
/* check both header GUIDs */
if (mspack_memcmp(&buf[chmhead_GUID1], &guids[0], 32L) != 0) {
D(("incorrect GUIDs"))
return MSPACK_ERR_SIGNATURE;
}
chm->version = EndGetI32(&buf[chmhead_Version]);
chm->timestamp = EndGetM32(&buf[chmhead_Timestamp]);
chm->language = EndGetI32(&buf[chmhead_LanguageID]);
if (chm->version > 3) {
sys->message(fh, "WARNING; CHM version > 3");
}
/* read the header section table */
if (sys->read(fh, &buf[0], chmhst3_SIZEOF) != chmhst3_SIZEOF) {
return MSPACK_ERR_READ;
}
/* chmhst3_OffsetCS0 does not exist in version 1 or 2 CHM files.
* The offset will be corrected later, once HS1 is read.
*/
if (read_off64(&offset, &buf[chmhst_OffsetHS0], sys, fh) ||
read_off64(&chm->dir_offset, &buf[chmhst_OffsetHS1], sys, fh) ||
read_off64(&chm->sec0.offset, &buf[chmhst3_OffsetCS0], sys, fh))
{
return MSPACK_ERR_DATAFORMAT;
}
/* seek to header section 0 */
if (sys->seek(fh, offset, MSPACK_SYS_SEEK_START)) {
return MSPACK_ERR_SEEK;
}
/* read header section 0 */
if (sys->read(fh, &buf[0], chmhs0_SIZEOF) != chmhs0_SIZEOF) {
return MSPACK_ERR_READ;
}
if (read_off64(&chm->length, &buf[chmhs0_FileLen], sys, fh)) {
return MSPACK_ERR_DATAFORMAT;
}
/* seek to header section 1 */
if (sys->seek(fh, chm->dir_offset, MSPACK_SYS_SEEK_START)) {
return MSPACK_ERR_SEEK;
}
/* read header section 1 */
if (sys->read(fh, &buf[0], chmhs1_SIZEOF) != chmhs1_SIZEOF) {
return MSPACK_ERR_READ;
}
chm->dir_offset = sys->tell(fh);
chm->chunk_size = EndGetI32(&buf[chmhs1_ChunkSize]);
chm->density = EndGetI32(&buf[chmhs1_Density]);
chm->depth = EndGetI32(&buf[chmhs1_Depth]);
chm->index_root = EndGetI32(&buf[chmhs1_IndexRoot]);
chm->num_chunks = EndGetI32(&buf[chmhs1_NumChunks]);
chm->first_pmgl = EndGetI32(&buf[chmhs1_FirstPMGL]);
chm->last_pmgl = EndGetI32(&buf[chmhs1_LastPMGL]);
if (chm->version < 3) {
/* versions before 3 don't have chmhst3_OffsetCS0 */
chm->sec0.offset = chm->dir_offset + (chm->chunk_size * chm->num_chunks);
}
/* ensure chunk size is large enough for signature and num_entries */
if (chm->chunk_size < (pmgl_Entries + 2)) {
return MSPACK_ERR_DATAFORMAT;
}
/* if we are doing a quick read, stop here! */
if (!entire) {
return MSPACK_ERR_OK;
}
/* seek to the first PMGL chunk, and reduce the number of chunks to read */
if ((x = chm->first_pmgl) != 0) {
if (sys->seek(fh,(off_t) (x * chm->chunk_size), MSPACK_SYS_SEEK_CUR)) {
return MSPACK_ERR_SEEK;
}
}
num_chunks = chm->last_pmgl - x + 1;
if (!(chunk = (unsigned char *) sys->alloc(sys, (size_t)chm->chunk_size))) {
return MSPACK_ERR_NOMEMORY;
}
/* read and process all chunks from FirstPMGL to LastPMGL */
errors = 0;
while (num_chunks--) {
/* read next chunk */
if (sys->read(fh, chunk, (int)chm->chunk_size) != (int)chm->chunk_size) {
sys->free(chunk);
return MSPACK_ERR_READ;
}
/* process only directory (PMGL) chunks */
if (EndGetI32(&chunk[pmgl_Signature]) != 0x4C474D50) continue;
if (EndGetI32(&chunk[pmgl_QuickRefSize]) < 2) {
sys->message(fh, "WARNING; PMGL quickref area is too small");
}
if (EndGetI32(&chunk[pmgl_QuickRefSize]) >
((int)chm->chunk_size - pmgl_Entries))
{
sys->message(fh, "WARNING; PMGL quickref area is too large");
}
p = &chunk[pmgl_Entries];
end = &chunk[chm->chunk_size - 2];
num_entries = EndGetI16(end);
while (num_entries--) {
READ_ENCINT(name_len); name = p; p += name_len;
READ_ENCINT(section);
READ_ENCINT(offset);
READ_ENCINT(length);
/* empty files and directory names are stored as a file entry at
* offset 0 with length 0. We want to keep empty files, but not
* directory names, which end with a "/" */
if ((offset == 0) && (length == 0)) {
if ((name_len > 0) && (name[name_len-1] == '/')) continue;
}
if (section > 1) {
sys->message(fh, "invalid section number '%u'.", section);
continue;
}
if (!(fi = (struct mschmd_file *) sys->alloc(sys, sizeof(struct mschmd_file) + name_len + 1))) {
sys->free(chunk);
return MSPACK_ERR_NOMEMORY;
}
fi->next = NULL;
fi->filename = (char *) &fi[1];
fi->section = ((section == 0) ? (struct mschmd_section *) (&chm->sec0)
: (struct mschmd_section *) (&chm->sec1));
fi->offset = offset;
fi->length = length;
sys->copy(name, fi->filename, (size_t) name_len);
fi->filename[name_len] = '\0';
if (name[0] == ':' && name[1] == ':') {
/* system file */
if (mspack_memcmp(&name[2], &content_name[2], 31L) == 0) {
if (mspack_memcmp(&name[33], &content_name[33], 8L) == 0) {
chm->sec1.content = fi;
}
else if (mspack_memcmp(&name[33], &control_name[33], 11L) == 0) {
chm->sec1.control = fi;
}
else if (mspack_memcmp(&name[33], &spaninfo_name[33], 8L) == 0) {
chm->sec1.spaninfo = fi;
}
else if (mspack_memcmp(&name[33], &rtable_name[33], 72L) == 0) {
chm->sec1.rtable = fi;
}
}
fi->next = chm->sysfiles;
chm->sysfiles = fi;
}
else {
/* normal file */
if (link) link->next = fi; else chm->files = fi;
link = fi;
}
}
/* this is reached either when num_entries runs out, or if
* reading data from the chunk reached a premature end of chunk */
chunk_end:
if (num_entries >= 0) {
D(("chunk ended before all entries could be read"))
errors++;
}
}
sys->free(chunk);
return (errors > 0) ? MSPACK_ERR_DATAFORMAT : MSPACK_ERR_OK;
}
/***************************************
* CHMD_FAST_FIND
***************************************
* uses PMGI index chunks and quickref data to quickly locate a file
* directly from the on-disk index.
*
* TODO: protect against infinite loops in chunks (where pgml_NextChunk
* or a PGMI index entry point to an already visited chunk)
*/
static int chmd_fast_find(struct mschm_decompressor *base,
struct mschmd_header *chm, const char *filename,
struct mschmd_file *f_ptr, int f_size)
{
struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base;
struct mspack_system *sys;
struct mspack_file *fh;
const unsigned char *chunk, *p, *end;
int err = MSPACK_ERR_OK, result = -1;
unsigned int n, sec;
if (!self || !chm || !f_ptr || (f_size != sizeof(struct mschmd_file))) {
return MSPACK_ERR_ARGS;
}
sys = self->system;
/* clear the results structure */
memset(f_ptr, 0, f_size);
if (!(fh = sys->open(sys, chm->filename, MSPACK_SYS_OPEN_READ))) {
return MSPACK_ERR_OPEN;
}
/* go through PMGI chunk hierarchy to reach PMGL chunk */
if (chm->index_root < chm->num_chunks) {
n = chm->index_root;
for (;;) {
if (!(chunk = read_chunk(self, chm, fh, n))) {
sys->close(fh);
return self->error;
}
/* search PMGI/PMGL chunk. exit early if no entry found */
if ((result = search_chunk(chm, chunk, filename, &p, &end)) <= 0) {
break;
}
/* found result. loop around for next chunk if this is PMGI */
if (chunk[3] == 0x4C) break; else READ_ENCINT(n);
}
}
else {
/* PMGL chunks only, search from first_pmgl to last_pmgl */
for (n = chm->first_pmgl; n <= chm->last_pmgl;
n = EndGetI32(&chunk[pmgl_NextChunk]))
{
if (!(chunk = read_chunk(self, chm, fh, n))) {
err = self->error;
break;
}
/* search PMGL chunk. exit if file found */
if ((result = search_chunk(chm, chunk, filename, &p, &end)) > 0) {
break;
}
}
}
/* if we found a file, read it */
if (result > 0) {
READ_ENCINT(sec);
f_ptr->section = (sec == 0) ? (struct mschmd_section *) &chm->sec0
: (struct mschmd_section *) &chm->sec1;
READ_ENCINT(f_ptr->offset);
READ_ENCINT(f_ptr->length);
}
else if (result < 0) {
err = MSPACK_ERR_DATAFORMAT;
}
sys->close(fh);
return self->error = err;
chunk_end:
D(("read beyond end of chunk entries"))
sys->close(fh);
return self->error = MSPACK_ERR_DATAFORMAT;
}
/* reads the given chunk into memory, storing it in a chunk cache
* so it doesn't need to be read from disk more than once
*/
static unsigned char *read_chunk(struct mschm_decompressor_p *self,
struct mschmd_header *chm,
struct mspack_file *fh,
unsigned int chunk_num)
{
struct mspack_system *sys = self->system;
unsigned char *buf;
/* check arguments - most are already checked by chmd_fast_find */
if (chunk_num > chm->num_chunks) return NULL;
/* ensure chunk cache is available */
if (!chm->chunk_cache) {
size_t size = sizeof(unsigned char *) * chm->num_chunks;
if (!(chm->chunk_cache = (unsigned char **) sys->alloc(sys, size))) {
self->error = MSPACK_ERR_NOMEMORY;
return NULL;
}
memset(chm->chunk_cache, 0, size);
}
/* try to answer out of chunk cache */
if (chm->chunk_cache[chunk_num]) return chm->chunk_cache[chunk_num];
/* need to read chunk - allocate memory for it */
if (!(buf = (unsigned char *) sys->alloc(sys, chm->chunk_size))) {
self->error = MSPACK_ERR_NOMEMORY;
return NULL;
}
/* seek to block and read it */
if (sys->seek(fh, (off_t) (chm->dir_offset + (chunk_num * chm->chunk_size)),
MSPACK_SYS_SEEK_START))
{
self->error = MSPACK_ERR_SEEK;
sys->free(buf);
return NULL;
}
if (sys->read(fh, buf, (int)chm->chunk_size) != (int)chm->chunk_size) {
self->error = MSPACK_ERR_READ;
sys->free(buf);
return NULL;
}
/* check the signature. Is is PMGL or PMGI? */
if (!((buf[0] == 0x50) && (buf[1] == 0x4D) && (buf[2] == 0x47) &&
((buf[3] == 0x4C) || (buf[3] == 0x49))))
{
self->error = MSPACK_ERR_SEEK;
sys->free(buf);
return NULL;
}
/* all OK. Store chunk in cache and return it */
return chm->chunk_cache[chunk_num] = buf;
}
/* searches a PMGI/PMGL chunk for a given filename entry. Returns -1 on
* data format error, 0 if entry definitely not found, 1 if entry
* found. In the latter case, *result and *result_end are set pointing
* to that entry's data (either the "next chunk" ENCINT for a PMGI or
* the section, offset and length ENCINTs for a PMGL).
*
* In the case of PMGL chunks, the entry has definitely been
* found. In the case of PMGI chunks, the entry which points to the
* chunk that may eventually contain that entry has been found.
*/
static int search_chunk(struct mschmd_header *chm,
const unsigned char *chunk,
const char *filename,
const unsigned char **result,
const unsigned char **result_end)
{
const unsigned char *start, *end, *p;
unsigned int qr_size, num_entries, qr_entries, qr_density, name_len;
unsigned int L, R, M, sec, fname_len, entries_off, is_pmgl;
int cmp;
fname_len = strlen(filename);
/* PMGL chunk or PMGI chunk? (note: read_chunk() has already
* checked the rest of the characters in the chunk signature) */
if (chunk[3] == 0x4C) {
is_pmgl = 1;
entries_off = pmgl_Entries;
}
else {
is_pmgl = 0;
entries_off = pmgi_Entries;
}
/* Step 1: binary search first filename of each QR entry
* - target filename == entry
* found file
* - target filename < all entries
* file not found
* - target filename > all entries
* proceed to step 2 using final entry
* - target filename between two searched entries
* proceed to step 2
*/
qr_size = EndGetI32(&chunk[pmgl_QuickRefSize]);
start = &chunk[chm->chunk_size - 2];
end = &chunk[chm->chunk_size - qr_size];
num_entries = EndGetI16(start);
qr_density = 1 + (1 << chm->density);
qr_entries = (num_entries + qr_density-1) / qr_density;
if (num_entries == 0) {
D(("chunk has no entries"))
return -1;
}
if (qr_size > chm->chunk_size) {
D(("quickref size > chunk size"))
return -1;
}
*result_end = end;
if (((int)qr_entries * 2) > (start - end)) {
D(("WARNING; more quickrefs than quickref space"))
qr_entries = 0; /* but we can live with it */
}
if (qr_entries > 0) {
L = 0;
R = qr_entries - 1;
do {
/* pick new midpoint */
M = (L + R) >> 1;
/* compare filename with entry QR points to */
p = &chunk[entries_off + (M ? EndGetI16(start - (M << 1)) : 0)];
READ_ENCINT(name_len);
if (p + name_len > end) goto chunk_end;
cmp = compare(filename, (char *)p, fname_len, name_len);
if (cmp == 0) break;
else if (cmp < 0) { if (M) R = M - 1; else return 0; }
else if (cmp > 0) L = M + 1;
} while (L <= R);
M = (L + R) >> 1;
if (cmp == 0) {
/* exact match! */
p += name_len;
*result = p;
return 1;
}
/* otherwise, read the group of entries for QR entry M */
p = &chunk[entries_off + (M ? EndGetI16(start - (M << 1)) : 0)];
num_entries -= (M * qr_density);
if (num_entries > qr_density) num_entries = qr_density;
}
else {
p = &chunk[entries_off];
}
/* Step 2: linear search through the set of entries reached in step 1.
* - filename == any entry
* found entry
* - filename < all entries (PMGI) or any entry (PMGL)
* entry not found, stop now
* - filename > all entries
* entry not found (PMGL) / maybe found (PMGI)
* -
*/
*result = NULL;
while (num_entries-- > 0) {
READ_ENCINT(name_len);
if (p + name_len > end) goto chunk_end;
cmp = compare(filename, (char *)p, fname_len, name_len);
p += name_len;
if (cmp == 0) {
/* entry found */
*result = p;
return 1;
}
if (cmp < 0) {
/* entry not found (PMGL) / maybe found (PMGI) */
break;
}
/* read and ignore the rest of this entry */
if (is_pmgl) {
READ_ENCINT(R); /* skip section */
READ_ENCINT(R); /* skip offset */
READ_ENCINT(R); /* skip length */
}
else {
*result = p; /* store potential final result */
READ_ENCINT(R); /* skip chunk number */
}
}
/* PMGL? not found. PMGI? maybe found */
return (is_pmgl) ? 0 : (*result ? 1 : 0);
chunk_end:
D(("reached end of chunk data while searching"))
return -1;
}
#if HAVE_TOWLOWER
# if HAVE_WCTYPE_H
# include <wctype.h>
# endif
# define TOLOWER(x) towlower(x)
#elif HAVE_TOLOWER
# if HAVE_CTYPE_H
# include <ctype.h>
# endif
# define TOLOWER(x) tolower(x)
#else
# define TOLOWER(x) (((x)<0||(x)>256)?(x):mspack_tolower_map[(x)])
/* Map of char -> lowercase char for the first 256 chars. Generated with:
* LC_CTYPE=en_GB.utf-8 perl -Mlocale -le 'print map{ord(lc chr).","} 0..255'
*/
static const unsigned char mspack_tolower_map[256] = {
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,
28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,
53,54,55,56,57,58,59,60,61,62,63,64,97,98,99,100,101,102,103,104,105,106,
107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,91,92,93,94,
95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,
115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,
134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,
153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,
172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,
191,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,
242,243,244,245,246,215,248,249,250,251,252,253,254,223,224,225,226,227,228,
229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,
248,249,250,251,252,253,254,255
};
#endif
/* decodes a UTF-8 character from s[] into c. Will not read past e. */
#define GET_UTF8_CHAR(s, e, c) do { \
unsigned char x = *s++; \
if (x < 0x80) c = x; \
else if (x < 0xC0) c = -1; \
else if (x < 0xE0) { \
c = (s >= e) ? -1 : ((x & 0x1F) << 6) | (*s++ & 0x3F); \
} \
else if (x < 0xF0) { \
c = (s+2 > e) ? -1 : ((x & 0x0F) << 12) | ((s[0] & 0x3F) << 6) \
| (s[1] & 0x3F); \
s += 2; \
} \
else if (x < 0xF8) { \
c = (s+3 > e) ? -1 : ((x & 0x07) << 18) | ((s[0] & 0x3F) << 12) \
| ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); \
s += 3; \
} \
else if (x < 0xFC) { \
c = (s+4 > e) ? -1 : ((x & 0x03) << 24) | ((s[0] & 0x3F) << 18) \
| ((s[1] & 0x3F) << 12)|((s[2] & 0x3F) << 6)|(s[3] & 0x3F); \
s += 4; \
} \
else if (x < 0xFE) { \
c = (s+5>e)?-1:((x&1)<<30)|((s[0]&0x3F)<<24)|((s[1]&0x3F)<<18)| \
((s[2] & 0x3F) << 12) | ((s[3] & 0x3F) << 6)|(s[4] & 0x3F); \
s += 5; \
} \
else c = -1; \
} while (0)
/* case-insensitively compares two UTF8 encoded strings. String length for
* both strings must be provided, null bytes are not terminators */
static inline int compare(const char *s1, const char *s2, int l1, int l2) {
register const unsigned char *p1 = (const unsigned char *) s1;
register const unsigned char *p2 = (const unsigned char *) s2;
register const unsigned char *e1 = p1 + l1, *e2 = p2 + l2;
int c1, c2;
while (p1 < e1 && p2 < e2) {
GET_UTF8_CHAR(p1, e1, c1);
GET_UTF8_CHAR(p2, e2, c2);
if (c1 == c2) continue;
c1 = TOLOWER(c1);
c2 = TOLOWER(c2);
if (c1 != c2) return c1 - c2;
}
return l1 - l2;
}
/***************************************
* CHMD_EXTRACT
***************************************
* extracts a file from a CHM helpfile
*/
static int chmd_extract(struct mschm_decompressor *base,
struct mschmd_file *file, const char *filename)
{
struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base;
struct mspack_system *sys;
struct mschmd_header *chm;
struct mspack_file *fh;
off_t bytes;
if (!self) return MSPACK_ERR_ARGS;
if (!file || !file->section) return self->error = MSPACK_ERR_ARGS;
sys = self->system;
chm = file->section->chm;
/* create decompression state if it doesn't exist */
if (!self->d) {
self->d = (struct mschmd_decompress_state *) sys->alloc(sys, sizeof(struct mschmd_decompress_state));
if (!self->d) return self->error = MSPACK_ERR_NOMEMORY;
self->d->chm = chm;
self->d->offset = 0;
self->d->state = NULL;
self->d->sys = *sys;
self->d->sys.write = &chmd_sys_write;
self->d->infh = NULL;
self->d->outfh = NULL;
}
/* open input chm file if not open, or the open one is a different chm */
if (!self->d->infh || (self->d->chm != chm)) {
if (self->d->infh) sys->close(self->d->infh);
if (self->d->state) lzxd_free(self->d->state);
self->d->chm = chm;
self->d->offset = 0;
self->d->state = NULL;
self->d->infh = sys->open(sys, chm->filename, MSPACK_SYS_OPEN_READ);
if (!self->d->infh) return self->error = MSPACK_ERR_OPEN;
}
/* open file for output */
if (!(fh = sys->open(sys, filename, MSPACK_SYS_OPEN_WRITE))) {
return self->error = MSPACK_ERR_OPEN;
}
/* if file is empty, simply creating it is enough */
if (!file->length) {
sys->close(fh);
return self->error = MSPACK_ERR_OK;
}
self->error = MSPACK_ERR_OK;
switch (file->section->id) {
case 0: /* Uncompressed section file */
/* simple seek + copy */
if (sys->seek(self->d->infh, file->section->chm->sec0.offset
+ file->offset, MSPACK_SYS_SEEK_START))
{
self->error = MSPACK_ERR_SEEK;
}
else {
unsigned char buf[512];
off_t length = file->length;
while (length > 0) {
int run = sizeof(buf);
if ((off_t)run > length) run = (int)length;
if (sys->read(self->d->infh, &buf[0], run) != run) {
self->error = MSPACK_ERR_READ;
break;
}
if (sys->write(fh, &buf[0], run) != run) {
self->error = MSPACK_ERR_WRITE;
break;
}
length -= run;
}
}
break;
case 1: /* MSCompressed section file */
/* (re)initialise compression state if we it is not yet initialised,
* or we have advanced too far and have to backtrack
*/
if (!self->d->state || (file->offset < self->d->offset)) {
if (self->d->state) {
lzxd_free(self->d->state);
self->d->state = NULL;
}
if (chmd_init_decomp(self, file)) break;
}
/* seek to input data */
if (sys->seek(self->d->infh, self->d->inoffset, MSPACK_SYS_SEEK_START)) {
self->error = MSPACK_ERR_SEEK;
break;
}
/* get to correct offset. */
self->d->outfh = NULL;
if ((bytes = file->offset - self->d->offset)) {
self->error = lzxd_decompress(self->d->state, bytes);
}
/* if getting to the correct offset was error free, unpack file */
if (!self->error) {
self->d->outfh = fh;
self->error = lzxd_decompress(self->d->state, file->length);
}
/* save offset in input source stream, in case there is a section 0
* file between now and the next section 1 file extracted */
self->d->inoffset = sys->tell(self->d->infh);
/* if an LZX error occured, the LZX decompressor is now useless */
if (self->error) {
if (self->d->state) lzxd_free(self->d->state);
self->d->state = NULL;
}
break;
}
sys->close(fh);
return self->error;
}
/***************************************
* CHMD_SYS_WRITE
***************************************
* chmd_sys_write is the internal writer function which the decompressor
* uses. If either writes data to disk (self->d->outfh) with the real
* sys->write() function, or does nothing with the data when
* self->d->outfh == NULL. advances self->d->offset.
*/
static int chmd_sys_write(struct mspack_file *file, void *buffer, int bytes) {
struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) file;
self->d->offset += bytes;
if (self->d->outfh) {
return self->system->write(self->d->outfh, buffer, bytes);
}
return bytes;
}
/***************************************
* CHMD_INIT_DECOMP
***************************************
* Initialises the LZX decompressor to decompress the compressed stream,
* from the nearest reset offset and length that is needed for the given
* file.
*/
static int chmd_init_decomp(struct mschm_decompressor_p *self,
struct mschmd_file *file)
{
int window_size, window_bits, reset_interval, entry, err;
struct mspack_system *sys = self->system;
struct mschmd_sec_mscompressed *sec;
unsigned char *data;
off_t length, offset;
sec = (struct mschmd_sec_mscompressed *) file->section;
/* ensure we have a mscompressed content section */
err = find_sys_file(self, sec, &sec->content, content_name);
if (err) return self->error = err;
/* ensure we have a ControlData file */
err = find_sys_file(self, sec, &sec->control, control_name);
if (err) return self->error = err;
/* read ControlData */
if (sec->control->length < lzxcd_SIZEOF) {
D(("ControlData file is too short"))
return self->error = MSPACK_ERR_DATAFORMAT;
}
if (!(data = read_sys_file(self, sec->control))) {
D(("can't read mscompressed control data file"))
return self->error;
}
/* check LZXC signature */
if (EndGetI32(&data[lzxcd_Signature]) != 0x43585A4C) {
sys->free(data);
return self->error = MSPACK_ERR_SIGNATURE;
}
/* read reset_interval and window_size and validate version number */
switch (EndGetI32(&data[lzxcd_Version])) {
case 1:
reset_interval = EndGetI32(&data[lzxcd_ResetInterval]);
window_size = EndGetI32(&data[lzxcd_WindowSize]);
break;
case 2:
reset_interval = EndGetI32(&data[lzxcd_ResetInterval]) * LZX_FRAME_SIZE;
window_size = EndGetI32(&data[lzxcd_WindowSize]) * LZX_FRAME_SIZE;
break;
default:
D(("bad controldata version"))
sys->free(data);
return self->error = MSPACK_ERR_DATAFORMAT;
}
/* free ControlData */
sys->free(data);
/* find window_bits from window_size */
switch (window_size) {
case 0x008000: window_bits = 15; break;
case 0x010000: window_bits = 16; break;
case 0x020000: window_bits = 17; break;
case 0x040000: window_bits = 18; break;
case 0x080000: window_bits = 19; break;
case 0x100000: window_bits = 20; break;
case 0x200000: window_bits = 21; break;
default:
D(("bad controldata window size"))
return self->error = MSPACK_ERR_DATAFORMAT;
}
/* validate reset_interval */
if (reset_interval % LZX_FRAME_SIZE) {
D(("bad controldata reset interval"))
return self->error = MSPACK_ERR_DATAFORMAT;
}
/* which reset table entry would we like? */
entry = file->offset / reset_interval;
/* convert from reset interval multiple (usually 64k) to 32k frames */
entry *= reset_interval / LZX_FRAME_SIZE;
/* read the reset table entry */
if (read_reset_table(self, sec, entry, &length, &offset)) {
/* the uncompressed length given in the reset table is dishonest.
* the uncompressed data is always padded out from the given
* uncompressed length up to the next reset interval */
length += reset_interval - 1;
length &= -reset_interval;
}
else {
/* if we can't read the reset table entry, just start from
* the beginning. Use spaninfo to get the uncompressed length */
entry = 0;
offset = 0;
err = read_spaninfo(self, sec, &length);
}
if (err) return self->error = err;
/* get offset of compressed data stream:
* = offset of uncompressed section from start of file
* + offset of compressed stream from start of uncompressed section
* + offset of chosen reset interval from start of compressed stream */
self->d->inoffset = file->section->chm->sec0.offset + sec->content->offset + offset;
/* set start offset and overall remaining stream length */
self->d->offset = entry * LZX_FRAME_SIZE;
length -= self->d->offset;
/* initialise LZX stream */
self->d->state = lzxd_init(&self->d->sys, self->d->infh,
(struct mspack_file *) self, window_bits,
reset_interval / LZX_FRAME_SIZE,
4096, length);
if (!self->d->state) self->error = MSPACK_ERR_NOMEMORY;
return self->error;
}
/***************************************
* READ_RESET_TABLE
***************************************
* Reads one entry out of the reset table. Also reads the uncompressed
* data length. Writes these to offset_ptr and length_ptr respectively.
* Returns non-zero for success, zero for failure.
*/
static int read_reset_table(struct mschm_decompressor_p *self,
struct mschmd_sec_mscompressed *sec,
int entry, off_t *length_ptr, off_t *offset_ptr)
{
struct mspack_system *sys = self->system;
unsigned char *data;
int pos, entrysize;
/* do we have a ResetTable file? */
int err = find_sys_file(self, sec, &sec->rtable, rtable_name);
if (err) return 0;
/* read ResetTable file */
if (sec->rtable->length < lzxrt_headerSIZEOF) {
D(("ResetTable file is too short"))
return 0;
}
if (!(data = read_sys_file(self, sec->rtable))) {
D(("can't read reset table"))
return 0;
}
/* check sanity of reset table */
if (EndGetI32(&data[lzxrt_FrameLen]) != LZX_FRAME_SIZE) {
D(("bad reset table frame length"))
sys->free(data);
return 0;
}
/* get the uncompressed length of the LZX stream */
if (read_off64(length_ptr, data, sys, self->d->infh)) {
sys->free(data);
return 0;
}
entrysize = EndGetI32(&data[lzxrt_EntrySize]);
pos = EndGetI32(&data[lzxrt_TableOffset]) + (entry * entrysize);
/* ensure reset table entry for this offset exists */
if (entry < EndGetI32(&data[lzxrt_NumEntries]) &&
((pos + entrysize) <= sec->rtable->length))
{
switch (entrysize) {
case 4:
*offset_ptr = EndGetI32(&data[pos]);
err = 0;
break;
case 8:
err = read_off64(offset_ptr, &data[pos], sys, self->d->infh);
break;
default:
D(("reset table entry size neither 4 nor 8"))
err = 1;
break;
}
}
else {
D(("bad reset interval"))
err = 1;
}
/* free the reset table */
sys->free(data);
/* return success */
return (err == 0);
}
/***************************************
* READ_SPANINFO
***************************************
* Reads the uncompressed data length from the spaninfo file.
* Returns zero for success or a non-zero error code for failure.
*/
static int read_spaninfo(struct mschm_decompressor_p *self,
struct mschmd_sec_mscompressed *sec,
off_t *length_ptr)
{
struct mspack_system *sys = self->system;
unsigned char *data;
/* find SpanInfo file */
int err = find_sys_file(self, sec, &sec->spaninfo, spaninfo_name);
if (err) return MSPACK_ERR_DATAFORMAT;
/* check it's large enough */
if (sec->spaninfo->length != 8) {
D(("SpanInfo file is wrong size"))
return MSPACK_ERR_DATAFORMAT;
}
/* read the SpanInfo file */
if (!(data = read_sys_file(self, sec->spaninfo))) {
D(("can't read SpanInfo file"))
return self->error;
}
/* get the uncompressed length of the LZX stream */
err = read_off64(length_ptr, data, sys, self->d->infh);
sys->free(data);
return (err) ? MSPACK_ERR_DATAFORMAT : MSPACK_ERR_OK;
}
/***************************************
* FIND_SYS_FILE
***************************************
* Uses chmd_fast_find to locate a system file, and fills out that system
* file's entry and links it into the list of system files. Returns zero
* for success, non-zero for both failure and the file not existing.
*/
static int find_sys_file(struct mschm_decompressor_p *self,
struct mschmd_sec_mscompressed *sec,
struct mschmd_file **f_ptr, const char *name)
{
struct mspack_system *sys = self->system;
struct mschmd_file result;
/* already loaded */
if (*f_ptr) return MSPACK_ERR_OK;
/* try using fast_find to find the file - return DATAFORMAT error if
* it fails, or successfully doesn't find the file */
if (chmd_fast_find((struct mschm_decompressor *) self, sec->base.chm,
name, &result, (int)sizeof(result)) || !result.section)
{
return MSPACK_ERR_DATAFORMAT;
}
if (!(*f_ptr = (struct mschmd_file *) sys->alloc(sys, sizeof(result)))) {
return MSPACK_ERR_NOMEMORY;
}
/* copy result */
*(*f_ptr) = result;
(*f_ptr)->filename = (char *) name;
/* link file into sysfiles list */
(*f_ptr)->next = sec->base.chm->sysfiles;
sec->base.chm->sysfiles = *f_ptr;
return MSPACK_ERR_OK;
}
/***************************************
* READ_SYS_FILE
***************************************
* Allocates memory for a section 0 (uncompressed) file and reads it into
* memory.
*/
static unsigned char *read_sys_file(struct mschm_decompressor_p *self,
struct mschmd_file *file)
{
struct mspack_system *sys = self->system;
unsigned char *data = NULL;
int len;
if (!file || !file->section || (file->section->id != 0)) {
self->error = MSPACK_ERR_DATAFORMAT;
return NULL;
}
len = (int) file->length;
if (!(data = (unsigned char *) sys->alloc(sys, (size_t) len))) {
self->error = MSPACK_ERR_NOMEMORY;
return NULL;
}
if (sys->seek(self->d->infh, file->section->chm->sec0.offset
+ file->offset, MSPACK_SYS_SEEK_START))
{
self->error = MSPACK_ERR_SEEK;
sys->free(data);
return NULL;
}
if (sys->read(self->d->infh, data, len) != len) {
self->error = MSPACK_ERR_READ;
sys->free(data);
return NULL;
}
return data;
}
/***************************************
* CHMD_ERROR
***************************************
* returns the last error that occurred
*/
static int chmd_error(struct mschm_decompressor *base) {
struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base;
return (self) ? self->error : MSPACK_ERR_ARGS;
}
/***************************************
* READ_OFF64
***************************************
* Reads a 64-bit signed integer from memory in Intel byte order.
* If running on a system with a 64-bit off_t, this is simply done.
* If running on a system with a 32-bit off_t, offsets up to 0x7FFFFFFF
* are accepted, offsets beyond that cause an error message.
*/
static int read_off64(off_t *var, unsigned char *mem,
struct mspack_system *sys, struct mspack_file *fh)
{
#ifdef LARGEFILE_SUPPORT
*var = EndGetI64(mem);
#else
*var = EndGetI32(mem);
if ((*var & 0x80000000) || EndGetI32(mem+4)) {
sys->message(fh, (char *)largefile_msg);
return 1;
}
#endif
return 0;
}