From 2d9c0bab540274e99480d965f38f266d20097976 Mon Sep 17 00:00:00 2001 From: Dominik Riebeling Date: Fri, 6 Apr 2012 20:21:29 +0200 Subject: [PATCH] Add support for cp1252 (Western European) codepage. In Europe Windows defaults to its own codepage cp1252 (also known as "WinLatin" or "Windows-1252"). cp1252 adds some characters to ISO-8859-1. Some mp3 tagging software on Windows uses cp1252 instead of ISO-8859-1. This violates the ID3 specification, which requires tags to be ISO-8859-1 or Unicode. However, similar violations are made for other codepages and supported by Rockbox using the "Default Codepage" setting. Add support for cp1252 to enable people using such broken tools to override the correct decoding to get their tags displayed properly. Change-Id: I9f2ec478afe2503e99ee8e6609416c92b0f453e0 Reviewed-on: http://gerrit.rockbox.org/209 Reviewed-by: Jens Arnold Tested-by: Jens Arnold --- apps/lang/english.lang | 14 ++++++++++++++ apps/settings_list.c | 9 ++++++--- firmware/common/unicode.c | 9 ++++++--- firmware/include/rbunicode.h | 2 ++ tools/codepage_tables.c | 7 +++++++ tools/codepage_tables.h | 1 + tools/codepages.c | 22 ++++++++++++++++------ 7 files changed, 52 insertions(+), 12 deletions(-) diff --git a/apps/lang/english.lang b/apps/lang/english.lang index cd575e9cfa..a70bb8b923 100644 --- a/apps/lang/english.lang +++ b/apps/lang/english.lang @@ -13038,3 +13038,17 @@ *: "Start Sleep Timer" + + id: LANG_CODEPAGE_WESTERN_EUROPEAN + desc: in codepage setting menu + user: core + + *: "Western European (CP1252)" + + + *: "Western European (CP1252)" + + + *: "Western European" + + diff --git a/apps/settings_list.c b/apps/settings_list.c index c925b3273e..64bbe07ba4 100644 --- a/apps/settings_list.c +++ b/apps/settings_list.c @@ -1531,25 +1531,28 @@ const struct settings_list settings[] = { #ifdef HAVE_LCD_BITMAP /* The order must match with that in unicode.c */ "iso8859-1,iso8859-7,iso8859-8,cp1251,iso8859-11,cp1256," - "iso8859-9,iso8859-2,cp1250,sjis,gb2312,ksx1001,big5,utf-8", + "iso8859-9,iso8859-2,cp1250,cp1252,sjis,gb2312,ksx1001,big5,utf-8", set_codepage, 14, - ID2P(LANG_CODEPAGE_LATIN1), ID2P(LANG_CODEPAGE_GREEK), + ID2P(LANG_CODEPAGE_LATIN1), + ID2P(LANG_CODEPAGE_GREEK), ID2P(LANG_CODEPAGE_HEBREW), ID2P(LANG_CODEPAGE_CYRILLIC), ID2P(LANG_CODEPAGE_THAI), ID2P(LANG_CODEPAGE_ARABIC), ID2P(LANG_CODEPAGE_TURKISH), ID2P(LANG_CODEPAGE_LATIN_EXTENDED), ID2P(LANG_CODEPAGE_CENTRAL_EUROPEAN), + ID2P(LANG_CODEPAGE_WESTERN_EUROPEAN), ID2P(LANG_CODEPAGE_JAPANESE), ID2P(LANG_CODEPAGE_SIMPLIFIED), ID2P(LANG_CODEPAGE_KOREAN), ID2P(LANG_CODEPAGE_TRADITIONAL), ID2P(LANG_CODEPAGE_UTF8)), #else /* !HAVE_LCD_BITMAP */ /* The order must match with that in unicode.c */ - "iso8859-1,iso8859-7,cp1251,iso8859-9,iso8859-2,cp1250,utf-8", + "iso8859-1,iso8859-7,cp1251,iso8859-9,iso8859-2,cp1250,cp1252,utf-8", set_codepage, 7, ID2P(LANG_CODEPAGE_LATIN1), ID2P(LANG_CODEPAGE_GREEK), ID2P(LANG_CODEPAGE_CYRILLIC), ID2P(LANG_CODEPAGE_TURKISH), ID2P(LANG_CODEPAGE_LATIN_EXTENDED), ID2P(LANG_CODEPAGE_CENTRAL_EUROPEAN), + ID2P(LANG_CODEPAGE_WESTERN_EUROPEAN), ID2P(LANG_CODEPAGE_UTF8)), #endif OFFON_SETTING(0, warnon_erase_dynplaylist, LANG_WARN_ERASEDYNPLAYLIST_MENU, diff --git a/firmware/common/unicode.c b/firmware/common/unicode.c index 25d4a9129e..3ad63ee4fb 100644 --- a/firmware/common/unicode.c +++ b/firmware/common/unicode.c @@ -56,7 +56,7 @@ static const char * const filename[NUM_TABLES] = static const char cp_2_table[NUM_CODEPAGES] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 0 + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 0 }; static const char * const name_codepages[NUM_CODEPAGES+1] = @@ -70,6 +70,7 @@ static const char * const name_codepages[NUM_CODEPAGES+1] = "ISO-8859-9", "ISO-8859-2", "CP1250", + "CP1252", "SJIS", "GB-2312", "KSX-1001", @@ -80,7 +81,7 @@ static const char * const name_codepages[NUM_CODEPAGES+1] = #else /* !HAVE_LCD_BITMAP, reduced support */ -#define MAX_CP_TABLE_SIZE 640 +#define MAX_CP_TABLE_SIZE 768 #define NUM_TABLES 1 static const char * const filename[NUM_TABLES] = { @@ -89,7 +90,7 @@ static const char * const filename[NUM_TABLES] = { static const char cp_2_table[NUM_CODEPAGES] = { - 0, 1, 1, 1, 1, 1, 0 + 0, 1, 1, 1, 1, 1, 1, 0 }; static const char * const name_codepages[NUM_CODEPAGES+1] = @@ -100,6 +101,7 @@ static const char * const name_codepages[NUM_CODEPAGES+1] = "ISO-8859-9", "ISO-8859-2", "CP1250", + "CP1252", "UTF-8", "unknown" }; @@ -190,6 +192,7 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, /* cp tells us which codepage to convert from */ switch (cp) { case ISO_8859_7: /* Greek */ + case WIN_1252: /* Western European */ case WIN_1251: /* Cyrillic */ case ISO_8859_9: /* Turkish */ case ISO_8859_2: /* Latin Extended */ diff --git a/firmware/include/rbunicode.h b/firmware/include/rbunicode.h index a97ebb469b..d21a840b00 100644 --- a/firmware/include/rbunicode.h +++ b/firmware/include/rbunicode.h @@ -45,6 +45,7 @@ enum codepages { ISO_8859_9, /* Turkish */ ISO_8859_2, /* Latin Extended */ WIN_1250, /* Central European */ + WIN_1252, /* Western European */ SJIS, /* Japanese */ GB_2312, /* Simp. Chinese */ KSX_1001, /* Korean */ @@ -62,6 +63,7 @@ enum codepages { ISO_8859_9, /* Turkish */ ISO_8859_2, /* Latin Extended */ WIN_1250, /* Central European */ + WIN_1252, /* Western European */ UTF_8, /* Unicode */ NUM_CODEPAGES }; diff --git a/tools/codepage_tables.c b/tools/codepage_tables.c index 87e0da81a2..3f04c9df3e 100644 --- a/tools/codepage_tables.c +++ b/tools/codepage_tables.c @@ -57,6 +57,13 @@ const unsigned short cp1251_to_uni[] = { 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457 /* B8-BF */ }; +const unsigned short cp1252_to_uni[] = { + 0x20AC, 0x00A0, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, /* 80-87 */ + 0x0c26, 0x2030, 0x0160, 0x2039, 0x0152, 0x00a0, 0x017D, 0x00a0, /* 88-8F */ + 0x00A0, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, /* 90-97 */ + 0x02dc, 0x2122, 0x0161, 0x203A, 0x0153, 0x00a0, 0x017E, 0x0178 /* 98-9F */ +}; + const unsigned short cp1256_to_uni[] = { 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688, diff --git a/tools/codepage_tables.h b/tools/codepage_tables.h index 5c74baa38f..208a4c9237 100644 --- a/tools/codepage_tables.h +++ b/tools/codepage_tables.h @@ -22,6 +22,7 @@ extern const unsigned short iso8859_7_to_uni[]; extern const unsigned short cp1250_to_uni[]; extern const unsigned short cp1251_to_uni[]; +extern const unsigned short cp1252_to_uni[]; extern const unsigned short cp1256_to_uni[]; extern const unsigned short iso8859_2_to_uni[]; extern const unsigned short cp932_table[]; diff --git a/tools/codepages.c b/tools/codepages.c index 9c214397de..fb01c4dfb3 100644 --- a/tools/codepages.c +++ b/tools/codepages.c @@ -25,8 +25,8 @@ #define MAX_TABLE_SIZE 32768 -static const int mini_index[6] = { - 0, 1, 3, 6, 7, 8 +static const int mini_index[7] = { + 0, 1, 3, 6, 7, 8, 9 }; static unsigned short iso_table[MAX_TABLE_SIZE]; @@ -140,7 +140,7 @@ unsigned short iso_decode(unsigned char *latin1, int cp, int count) ucs = iso8859_2_to_uni[*latin1++ - 0xA1]; } break; - + case 0x08: /* Central European (CP1250) */ while (count--) { /* first convert to unicode */ @@ -150,7 +150,17 @@ unsigned short iso_decode(unsigned char *latin1, int cp, int count) ucs = cp1250_to_uni[*latin1++ - 0x80]; } break; - + + case 0x09: /* Western European (CP1252) */ + while (count--) { + /* first convert to unicode */ + if (*latin1 < 0x80 || *latin1 >= 0xa0) + ucs = *latin1++; + else + ucs = cp1252_to_uni[*latin1++ - 0x80]; + } + break; + default: break; } @@ -209,7 +219,7 @@ int main(int argc, char **argv) of = fopen("isomini.cp", "wb"); if (!of) return 1; - for (i=1; i<6; i++) { + for (i=1; i<7; i++) { for (j=0; j<128; j++) { k = (unsigned char)j + 128; @@ -223,7 +233,7 @@ int main(int argc, char **argv) of = fopen("iso.cp", "wb"); if (!of) return 1; - for (i=1; i<9; i++) { + for (i=1; i<10; i++) { for (j=0; j<128; j++) { k = (unsigned char)j + 128;