Cleaned up codepage handling, by Yoshihisa Uchida. (FS#9349)

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19339 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Björn Stenberg 2008-12-04 22:00:12 +00:00
parent 38e97057a9
commit af078b4f86
5 changed files with 119 additions and 49 deletions

View file

@ -620,6 +620,7 @@ static const struct plugin_api rockbox_api = {
/* new stuff at the end, sort into place next time /* new stuff at the end, sort into place next time
the API gets incompatible */ the API gets incompatible */
get_settings_list, get_settings_list,
get_codepage_name,
}; };
int plugin_load(const char* plugin, const void* parameter) int plugin_load(const char* plugin, const void* parameter)

View file

@ -131,7 +131,7 @@ void* plugin_get_buffer(size_t *buffer_size);
#define PLUGIN_MAGIC 0x526F634B /* RocK */ #define PLUGIN_MAGIC 0x526F634B /* RocK */
/* increase this every time the api struct changes */ /* increase this every time the api struct changes */
#define PLUGIN_API_VERSION 127 #define PLUGIN_API_VERSION 128
/* update this to latest version if a change to the api struct breaks /* update this to latest version if a change to the api struct breaks
backwards compatibility (and please take the opportunity to sort in any backwards compatibility (and please take the opportunity to sort in any
@ -783,7 +783,7 @@ struct plugin_api {
/* new stuff at the end, sort into place next time /* new stuff at the end, sort into place next time
the API gets incompatible */ the API gets incompatible */
const struct settings_list* (*get_settings_list)(int*count); const struct settings_list* (*get_settings_list)(int*count);
const char* (*get_codepage_name)(int cp);
}; };
/* plugin header */ /* plugin header */

View file

@ -348,23 +348,7 @@ struct preferences {
WIDE, WIDE,
} view_mode; } view_mode;
enum { enum codepages encoding;
ISO_8859_1=0,
ISO_8859_7,
ISO_8859_8,
CP1251,
ISO_8859_11,
ISO_8859_6,
ISO_8859_9,
ISO_8859_2,
CP1250,
SJIS,
GB2312,
KSX1001,
BIG5,
UTF8,
ENCODINGS
} encoding; /* FIXME: What should default encoding be? */
#ifdef HAVE_LCD_BITMAP #ifdef HAVE_LCD_BITMAP
enum { enum {
@ -433,16 +417,18 @@ unsigned char* get_ucs(const unsigned char* str, unsigned short* ch)
unsigned char utf8_tmp[6]; unsigned char utf8_tmp[6];
int count; int count;
if (prefs.encoding == UTF8) if (prefs.encoding == UTF_8)
return (unsigned char*)rb->utf8decode(str, ch); return (unsigned char*)rb->utf8decode(str, ch);
count = BUFFER_OOB(str+2)? 1:2; count = BUFFER_OOB(str+2)? 1:2;
rb->iso_decode(str, utf8_tmp, prefs.encoding, count); rb->iso_decode(str, utf8_tmp, prefs.encoding, count);
rb->utf8decode(utf8_tmp, ch); rb->utf8decode(utf8_tmp, ch);
#ifdef HAVE_LCD_BITMAP
if ((prefs.encoding == SJIS && *str > 0xA0 && *str < 0xE0) || prefs.encoding < SJIS) if ((prefs.encoding == SJIS && *str > 0xA0 && *str < 0xE0) || prefs.encoding < SJIS)
return (unsigned char*)str+1; return (unsigned char*)str+1;
else else
#endif
return (unsigned char*)str+2; return (unsigned char*)str+2;
} }
@ -1330,22 +1316,14 @@ static int col_limit(int col)
static bool encoding_setting(void) static bool encoding_setting(void)
{ {
static const struct opt_items names[] = { static struct opt_items names[NUM_CODEPAGES];
{"ISO-8859-1", -1}, int idx;
{"ISO-8859-7", -1},
{"ISO-8859-8", -1}, for (idx = 0; idx < NUM_CODEPAGES; idx++)
{"CP1251", -1}, {
{"ISO-8859-11", -1}, names[idx].string = rb->get_codepage_name(idx);
{"ISO-8859-6", -1}, names[idx].voice_id = -1;
{"ISO-8859-9", -1}, }
{"ISO-8859-2", -1},
{"CP1250", -1},
{"SJIS", -1},
{"GB-2312", -1},
{"KSX-1001", -1},
{"BIG5", -1},
{"UTF-8", -1},
};
return rb->set_option("Encoding", &prefs.encoding, INT, names, return rb->set_option("Encoding", &prefs.encoding, INT, names,
sizeof(names) / sizeof(names[0]), NULL); sizeof(names) / sizeof(names[0]), NULL);

View file

@ -1,6 +1,24 @@
/* Some conversion functions for handling UTF-8 /***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
* *
* copyright Marcoen Hirschberg (2004,2005) * Copyright (c) 2004,2005 by Marcoen Hirschberg
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
/* Some conversion functions for handling UTF-8
* *
* I got all the info from: * I got all the info from:
* http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 * http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
@ -27,11 +45,6 @@ static int loaded_cp_table = 0;
#define MAX_CP_TABLE_SIZE 32768 #define MAX_CP_TABLE_SIZE 32768
#define NUM_TABLES 5 #define NUM_TABLES 5
enum {
ISO_8859_1 = 0, ISO_8859_7, ISO_8859_8, WIN_1251,
ISO_8859_11, WIN_1256, ISO_8859_9, ISO_8859_2, WIN_1250,
SJIS, GB_2312, KSX_1001, BIG_5, UTF_8, NUM_CODEPAGES
};
static const char *filename[NUM_TABLES] = static const char *filename[NUM_TABLES] =
{ {
CODEPAGE_DIR"/iso.cp", CODEPAGE_DIR"/iso.cp",
@ -40,29 +53,58 @@ static const char *filename[NUM_TABLES] =
CODEPAGE_DIR"/949.cp", /* KSX1001 */ CODEPAGE_DIR"/949.cp", /* KSX1001 */
CODEPAGE_DIR"/950.cp" /* BIG5 */ CODEPAGE_DIR"/950.cp" /* BIG5 */
}; };
static const char cp_2_table[NUM_CODEPAGES] = static const char cp_2_table[NUM_CODEPAGES] =
{ {
0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 0 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 0
}; };
static const char *name_codepages[NUM_CODEPAGES+1] =
{
"ISO-8859-1",
"ISO-8859-7",
"ISO-8859-8",
"CP1251",
"ISO-8859-11",
"CP1256",
"ISO-8859-9",
"ISO-8859-2",
"CP1250",
"SJIS",
"GB-2312",
"KSX-1001",
"BIG5",
"UTF-8",
"unknown"
};
#else /* !HAVE_LCD_BITMAP, reduced support */ #else /* !HAVE_LCD_BITMAP, reduced support */
#define MAX_CP_TABLE_SIZE 640 #define MAX_CP_TABLE_SIZE 640
#define NUM_TABLES 1 #define NUM_TABLES 1
enum {
ISO_8859_1 = 0, ISO_8859_7, WIN_1251, ISO_8859_9,
ISO_8859_2, WIN_1250, UTF_8, NUM_CODEPAGES
};
static const char *filename[NUM_TABLES] = static const char *filename[NUM_TABLES] =
{ {
CODEPAGE_DIR"/isomini.cp", CODEPAGE_DIR"/isomini.cp",
}; };
static const char cp_2_table[NUM_CODEPAGES] = static const char cp_2_table[NUM_CODEPAGES] =
{ {
0, 1, 1, 1, 1, 1, 0 0, 1, 1, 1, 1, 1, 0
}; };
static const char *name_codepages[NUM_CODEPAGES+1] =
{
"ISO-8859-1",
"ISO-8859-7",
"CP1251",
"ISO-8859-9",
"ISO-8859-2",
"CP1250",
"UTF-8",
"unknown"
};
#endif #endif
static unsigned short codepage_table[MAX_CP_TABLE_SIZE]; static unsigned short codepage_table[MAX_CP_TABLE_SIZE];
@ -344,3 +386,10 @@ int utf8seek(const unsigned char* utf8, int offset)
} }
return pos; return pos;
} }
const char* get_codepage_name(int cp)
{
if (cp < 0 || cp>= NUM_CODEPAGES)
return name_codepages[NUM_CODEPAGES];
return name_codepages[cp];
}

View file

@ -1,16 +1,56 @@
/* Some conversion functions for handling UTF-8 /***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
* *
* copyright Marcoen Hirschberg (2004,2005) * Copyright (c) 2004,2005 by Marcoen Hirschberg
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
/* Some conversion functions for handling UTF-8
* *
* I got all the info from: * I got all the info from:
* http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 * http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
* and * and
* http://en.wikipedia.org/wiki/Unicode * http://en.wikipedia.org/wiki/Unicode
*/ */
#ifndef _RBUNICODE_H_
#define _RBUNICODE_H_
#ifndef __PCTOOL__
#include "config.h"
#endif
#define MASK 0xC0 /* 11000000 */ #define MASK 0xC0 /* 11000000 */
#define COMP 0x80 /* 10x */ #define COMP 0x80 /* 10x */
#ifdef HAVE_LCD_BITMAP
enum codepages {
ISO_8859_1 = 0, ISO_8859_7, ISO_8859_8, WIN_1251,
ISO_8859_11, WIN_1256, ISO_8859_9, ISO_8859_2, WIN_1250,
SJIS, GB_2312, KSX_1001, BIG_5, UTF_8, NUM_CODEPAGES
};
#else /* !HAVE_LCD_BITMAP, reduced support */
enum codepages {
ISO_8859_1 = 0, ISO_8859_7, WIN_1251, ISO_8859_9,
ISO_8859_2, WIN_1250, UTF_8, NUM_CODEPAGES
};
#endif
/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */ /* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8); unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8);
@ -21,3 +61,5 @@ unsigned long utf8length(const unsigned char *utf8);
const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs); const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs);
void set_codepage(int cp); void set_codepage(int cp);
int utf8seek(const unsigned char* utf8, int offset); int utf8seek(const unsigned char* utf8, int offset);
const char* get_codepage_name(int cp);
#endif /* _RBUNICODE_H_ */