7682cb5ca8
This commit corrects the display of diacritic characters, which exist in many languages. Hopefully, it will make Rockbox much more usable for users of these languages. Diacritic information (which used to decide whether a given character is diacritic or not) is taken from the Unicode Standard, Version 5.2. This feature does not affect drawing performance much, as the diacritic database is cached (simple MRU mechanism). There may be room for further performance, footprint, and code-reuse wise improvements, that could be worked on in the future. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@23742 a1c6a512-1295-4272-9138-f99709370657
378 lines
12 KiB
C
378 lines
12 KiB
C
/***************************************************************************
|
|
* __________ __ ___.
|
|
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
|
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
|
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
|
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
|
* \/ \/ \/ \/ \/
|
|
* $Id$
|
|
*
|
|
* Copyright (C) 2009 Phinitnun Chanasabaeng
|
|
* Initial work
|
|
* Copyright (C) 2009 Tomer Shalev
|
|
*
|
|
* Rockbox diacritic positioning
|
|
* Based on initial work by Phinitnun Chanasabaeng
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
*
|
|
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
|
* KIND, either express or implied.
|
|
*
|
|
****************************************************************************/
|
|
#include "diacritic.h"
|
|
#include "system.h"
|
|
|
|
#define DIAC_FLAG_DIACRITIC (1 << 31)
|
|
#define DIAC_FLAG_RTL (1 << 30)
|
|
#define DIAC_MASK 0x000FFFFF
|
|
|
|
#define DIAC_VAL(i) (diac_range[(i)] & DIAC_MASK)
|
|
#define DIAC_IS_DIACRITIC(i) ((diac_range[(i)] & DIAC_FLAG_DIACRITIC) ? 1 : 0)
|
|
#define DIAC_IS_RTL(i) ((diac_range[(i)] & DIAC_FLAG_RTL) ? 1 : 0)
|
|
#define DIAC_NUM_RANGES (ARRAYLEN(diac_range))
|
|
|
|
/* Sorted by Unicode value */
|
|
static const int diac_range[] =
|
|
{
|
|
0x00000,
|
|
0x002ff,
|
|
0x0036f | DIAC_FLAG_DIACRITIC, /* Combining Diacritical Marks */
|
|
0x00482,
|
|
0x00489 | DIAC_FLAG_DIACRITIC, /* Cyrillic */
|
|
0x00590,
|
|
0x005bd | DIAC_FLAG_DIACRITIC | DIAC_FLAG_RTL, /* Hebrew */
|
|
0x005be | DIAC_FLAG_RTL,
|
|
0x005bf | DIAC_FLAG_DIACRITIC | DIAC_FLAG_RTL, /* Hebrew */
|
|
0x005c0 | DIAC_FLAG_RTL,
|
|
0x005c2 | DIAC_FLAG_DIACRITIC | DIAC_FLAG_RTL, /* Hebrew */
|
|
0x005c3 | DIAC_FLAG_RTL,
|
|
0x005c5 | DIAC_FLAG_DIACRITIC | DIAC_FLAG_RTL, /* Hebrew */
|
|
0x005c6 | DIAC_FLAG_RTL,
|
|
0x005c7 | DIAC_FLAG_DIACRITIC | DIAC_FLAG_RTL, /* Hebrew */
|
|
0x0060f | DIAC_FLAG_RTL,
|
|
0x0061a | DIAC_FLAG_DIACRITIC | DIAC_FLAG_RTL, /* Arabic */
|
|
0x0064a | DIAC_FLAG_RTL,
|
|
0x0065e | DIAC_FLAG_DIACRITIC | DIAC_FLAG_RTL, /* Arabic */
|
|
0x0066f | DIAC_FLAG_RTL,
|
|
0x00670 | DIAC_FLAG_DIACRITIC | DIAC_FLAG_RTL, /* Arabic */
|
|
0x006d5 | DIAC_FLAG_RTL,
|
|
0x006dc | DIAC_FLAG_DIACRITIC | DIAC_FLAG_RTL, /* Arabic */
|
|
0x006de | DIAC_FLAG_RTL,
|
|
0x006e4 | DIAC_FLAG_DIACRITIC | DIAC_FLAG_RTL, /* Arabic */
|
|
0x006e6 | DIAC_FLAG_RTL,
|
|
0x006e8 | DIAC_FLAG_DIACRITIC | DIAC_FLAG_RTL, /* Arabic */
|
|
0x006e9 | DIAC_FLAG_RTL,
|
|
0x006ed | DIAC_FLAG_DIACRITIC | DIAC_FLAG_RTL, /* Arabic */
|
|
0x00710 | DIAC_FLAG_RTL,
|
|
0x00711 | DIAC_FLAG_DIACRITIC | DIAC_FLAG_RTL, /* Syriac */
|
|
0x0072f | DIAC_FLAG_RTL,
|
|
0x0074a | DIAC_FLAG_DIACRITIC | DIAC_FLAG_RTL, /* Syriac */
|
|
0x007a5 | DIAC_FLAG_RTL,
|
|
0x007b0 | DIAC_FLAG_DIACRITIC | DIAC_FLAG_RTL, /* Thaana */
|
|
0x007c0 | DIAC_FLAG_RTL,
|
|
0x007ea,
|
|
0x007f3 | DIAC_FLAG_DIACRITIC, /* NKo */
|
|
0x00815,
|
|
0x00819 | DIAC_FLAG_DIACRITIC, /* Samaritan */
|
|
0x0081a,
|
|
0x00823 | DIAC_FLAG_DIACRITIC, /* Samaritan */
|
|
0x00824,
|
|
0x00827 | DIAC_FLAG_DIACRITIC, /* Samaritan */
|
|
0x00828,
|
|
0x0082d | DIAC_FLAG_DIACRITIC, /* Samaritan */
|
|
0x008ff,
|
|
0x00903 | DIAC_FLAG_DIACRITIC, /* Devanagari */
|
|
0x0093b,
|
|
0x0093c | DIAC_FLAG_DIACRITIC, /* Devanagari */
|
|
0x0093d,
|
|
0x0094e | DIAC_FLAG_DIACRITIC, /* Devanagari */
|
|
0x00950,
|
|
0x00955 | DIAC_FLAG_DIACRITIC, /* Devanagari */
|
|
0x00961,
|
|
0x00963 | DIAC_FLAG_DIACRITIC, /* Devanagari */
|
|
0x00980,
|
|
0x00983 | DIAC_FLAG_DIACRITIC, /* Bengali */
|
|
0x009bb,
|
|
0x009bc | DIAC_FLAG_DIACRITIC, /* Bengali */
|
|
0x009bd,
|
|
0x009cd | DIAC_FLAG_DIACRITIC, /* Bengali */
|
|
0x009d6,
|
|
0x009d7 | DIAC_FLAG_DIACRITIC, /* Bengali */
|
|
0x009e1,
|
|
0x009e3 | DIAC_FLAG_DIACRITIC, /* Bengali */
|
|
0x00a00,
|
|
0x00a03 | DIAC_FLAG_DIACRITIC, /* Gurmukhi */
|
|
0x00a3b,
|
|
0x00a51 | DIAC_FLAG_DIACRITIC, /* Gurmukhi */
|
|
0x00a6f,
|
|
0x00a71 | DIAC_FLAG_DIACRITIC, /* Gurmukhi */
|
|
0x00a74,
|
|
0x00a75 | DIAC_FLAG_DIACRITIC, /* Gurmukhi */
|
|
0x00a80,
|
|
0x00a83 | DIAC_FLAG_DIACRITIC, /* Gujarati */
|
|
0x00abb,
|
|
0x00abc | DIAC_FLAG_DIACRITIC, /* Gujarati */
|
|
0x00abd,
|
|
0x00acd | DIAC_FLAG_DIACRITIC, /* Gujarati */
|
|
0x00ae1,
|
|
0x00ae3 | DIAC_FLAG_DIACRITIC, /* Gujarati */
|
|
0x00b00,
|
|
0x00b03 | DIAC_FLAG_DIACRITIC, /* Oriya */
|
|
0x00b3b,
|
|
0x00b3c | DIAC_FLAG_DIACRITIC, /* Oriya */
|
|
0x00b3d,
|
|
0x00b57 | DIAC_FLAG_DIACRITIC, /* Oriya */
|
|
0x00b81,
|
|
0x00b82 | DIAC_FLAG_DIACRITIC, /* Tamil */
|
|
0x00bbd,
|
|
0x00bcd | DIAC_FLAG_DIACRITIC, /* Tamil */
|
|
0x00bd6,
|
|
0x00bd7 | DIAC_FLAG_DIACRITIC, /* Tamil */
|
|
0x00c00,
|
|
0x00c03 | DIAC_FLAG_DIACRITIC, /* Telugu */
|
|
0x00c3d,
|
|
0x00c56 | DIAC_FLAG_DIACRITIC, /* Telugu */
|
|
0x00c61,
|
|
0x00c63 | DIAC_FLAG_DIACRITIC, /* Telugu */
|
|
0x00c81,
|
|
0x00c83 | DIAC_FLAG_DIACRITIC, /* Kannada */
|
|
0x00cbb,
|
|
0x00cbc | DIAC_FLAG_DIACRITIC, /* Kannada */
|
|
0x00cbd,
|
|
0x00cd6 | DIAC_FLAG_DIACRITIC, /* Kannada */
|
|
0x00ce1,
|
|
0x00ce3 | DIAC_FLAG_DIACRITIC, /* Kannada */
|
|
0x00d01,
|
|
0x00d03 | DIAC_FLAG_DIACRITIC, /* Malayalam */
|
|
0x00d3d,
|
|
0x00d57 | DIAC_FLAG_DIACRITIC, /* Malayalam */
|
|
0x00d61,
|
|
0x00d63 | DIAC_FLAG_DIACRITIC, /* Malayalam */
|
|
0x00d81,
|
|
0x00d83 | DIAC_FLAG_DIACRITIC, /* Sinhala */
|
|
0x00dc9,
|
|
0x00df3 | DIAC_FLAG_DIACRITIC, /* Sinhala */
|
|
0x00e30,
|
|
0x00e31 | DIAC_FLAG_DIACRITIC, /* Thai */
|
|
0x00e33,
|
|
0x00e3a | DIAC_FLAG_DIACRITIC, /* Thai */
|
|
0x00e46,
|
|
0x00e4e | DIAC_FLAG_DIACRITIC, /* Thai */
|
|
0x00eb0,
|
|
0x00eb1 | DIAC_FLAG_DIACRITIC, /* Lao */
|
|
0x00eb3,
|
|
0x00ebc | DIAC_FLAG_DIACRITIC, /* Lao */
|
|
0x00ec7,
|
|
0x00ecd | DIAC_FLAG_DIACRITIC, /* Lao */
|
|
0x00f17,
|
|
0x00f19 | DIAC_FLAG_DIACRITIC, /* Tibetan */
|
|
0x00f34,
|
|
0x00f35 | DIAC_FLAG_DIACRITIC, /* Tibetan */
|
|
0x00f36,
|
|
0x00f37 | DIAC_FLAG_DIACRITIC, /* Tibetan */
|
|
0x00f38,
|
|
0x00f39 | DIAC_FLAG_DIACRITIC, /* Tibetan */
|
|
0x00f3d,
|
|
0x00f3f | DIAC_FLAG_DIACRITIC, /* Tibetan */
|
|
0x00f70,
|
|
0x00f84 | DIAC_FLAG_DIACRITIC, /* Tibetan */
|
|
0x00f85,
|
|
0x00f87 | DIAC_FLAG_DIACRITIC, /* Tibetan */
|
|
0x00f8f,
|
|
0x00fbc | DIAC_FLAG_DIACRITIC, /* Tibetan */
|
|
0x0102a,
|
|
0x0103e | DIAC_FLAG_DIACRITIC, /* Myanmar */
|
|
0x01055,
|
|
0x01059 | DIAC_FLAG_DIACRITIC, /* Myanmar */
|
|
0x0105d,
|
|
0x01060 | DIAC_FLAG_DIACRITIC, /* Myanmar */
|
|
0x01061,
|
|
0x01064 | DIAC_FLAG_DIACRITIC, /* Myanmar */
|
|
0x01066,
|
|
0x0106d | DIAC_FLAG_DIACRITIC, /* Myanmar */
|
|
0x01070,
|
|
0x01074 | DIAC_FLAG_DIACRITIC, /* Myanmar */
|
|
0x01081,
|
|
0x0108d | DIAC_FLAG_DIACRITIC, /* Myanmar */
|
|
0x0108e,
|
|
0x0108f | DIAC_FLAG_DIACRITIC, /* Myanmar */
|
|
0x01099,
|
|
0x0109d | DIAC_FLAG_DIACRITIC, /* Myanmar */
|
|
0x0135e,
|
|
0x0135f | DIAC_FLAG_DIACRITIC, /* Ethiopic */
|
|
0x01711,
|
|
0x01714 | DIAC_FLAG_DIACRITIC, /* Tagalog */
|
|
0x01731,
|
|
0x01734 | DIAC_FLAG_DIACRITIC, /* Hanunoo */
|
|
0x01751,
|
|
0x01753 | DIAC_FLAG_DIACRITIC, /* Buhid */
|
|
0x01771,
|
|
0x01773 | DIAC_FLAG_DIACRITIC, /* Tagbanwa */
|
|
0x017b5,
|
|
0x017d3 | DIAC_FLAG_DIACRITIC, /* Khmer */
|
|
0x017dc,
|
|
0x017dd | DIAC_FLAG_DIACRITIC, /* Khmer */
|
|
0x018a8,
|
|
0x018a9 | DIAC_FLAG_DIACRITIC, /* Mongolian */
|
|
0x0191f,
|
|
0x0193b | DIAC_FLAG_DIACRITIC, /* Limbu */
|
|
0x019af,
|
|
0x019c0 | DIAC_FLAG_DIACRITIC, /* New Tai Lue */
|
|
0x019c7,
|
|
0x019c9 | DIAC_FLAG_DIACRITIC, /* New Tai Lue */
|
|
0x01a16,
|
|
0x01a1b | DIAC_FLAG_DIACRITIC, /* Buginese */
|
|
0x01a54,
|
|
0x01a7f | DIAC_FLAG_DIACRITIC, /* Tai Tham */
|
|
0x01aff,
|
|
0x01b04 | DIAC_FLAG_DIACRITIC, /* Balinese */
|
|
0x01b33,
|
|
0x01b44 | DIAC_FLAG_DIACRITIC, /* Balinese */
|
|
0x01b6a,
|
|
0x01b73 | DIAC_FLAG_DIACRITIC, /* Balinese */
|
|
0x01b7f,
|
|
0x01b82 | DIAC_FLAG_DIACRITIC, /* Sundanese */
|
|
0x01ba0,
|
|
0x01baa | DIAC_FLAG_DIACRITIC, /* Sundanese */
|
|
0x01c23,
|
|
0x01c37 | DIAC_FLAG_DIACRITIC, /* Lepcha */
|
|
0x01ccf,
|
|
0x01cd2 | DIAC_FLAG_DIACRITIC, /* Vedic Extensions */
|
|
0x01cd3,
|
|
0x01ce8 | DIAC_FLAG_DIACRITIC, /* Vedic Extensions */
|
|
0x01cec,
|
|
0x01ced | DIAC_FLAG_DIACRITIC, /* Vedic Extensions */
|
|
0x01cf1,
|
|
0x01cf2 | DIAC_FLAG_DIACRITIC, /* Vedic Extensions */
|
|
0x01dbf,
|
|
0x01dff | DIAC_FLAG_DIACRITIC, /* Combining Diacritical Marks Supplement */
|
|
0x020cf,
|
|
0x020f0 | DIAC_FLAG_DIACRITIC, /* Combining Diacritical Marks for Symbols */
|
|
0x02cee,
|
|
0x02cf1 | DIAC_FLAG_DIACRITIC, /* Coptic */
|
|
0x02ddf,
|
|
0x02dff | DIAC_FLAG_DIACRITIC, /* Coptic */
|
|
0x03029,
|
|
0x0302f | DIAC_FLAG_DIACRITIC, /* CJK Symbols and Punctuation */
|
|
0x03098,
|
|
0x0309a | DIAC_FLAG_DIACRITIC, /* Hiragana */
|
|
0x0a66e,
|
|
0x0a672 | DIAC_FLAG_DIACRITIC, /* Hiragana */
|
|
0x0a67b,
|
|
0x0a67d | DIAC_FLAG_DIACRITIC, /* Hiragana */
|
|
0x0a6ef,
|
|
0x0a6f1 | DIAC_FLAG_DIACRITIC, /* Bamum */
|
|
0x0a801,
|
|
0x0a802 | DIAC_FLAG_DIACRITIC, /* Syloti Nagri */
|
|
0x0a805,
|
|
0x0a806 | DIAC_FLAG_DIACRITIC, /* Syloti Nagri */
|
|
0x0a80a,
|
|
0x0a80b | DIAC_FLAG_DIACRITIC, /* Syloti Nagri */
|
|
0x0a822,
|
|
0x0a827 | DIAC_FLAG_DIACRITIC, /* Syloti Nagri */
|
|
0x0a87f,
|
|
0x0a881 | DIAC_FLAG_DIACRITIC, /* Saurashtra */
|
|
0x0a8b3,
|
|
0x0a8c4 | DIAC_FLAG_DIACRITIC, /* Saurashtra */
|
|
0x0a8df,
|
|
0x0a8f1 | DIAC_FLAG_DIACRITIC, /* Devanagari Extended */
|
|
0x0a925,
|
|
0x0a92d | DIAC_FLAG_DIACRITIC, /* Kayah Li */
|
|
0x0a946,
|
|
0x0a953 | DIAC_FLAG_DIACRITIC, /* Rejang */
|
|
0x0a97f,
|
|
0x0a983 | DIAC_FLAG_DIACRITIC, /* Javanese */
|
|
0x0a9b2,
|
|
0x0a9c0 | DIAC_FLAG_DIACRITIC, /* Javanese */
|
|
0x0aa28,
|
|
0x0aa36 | DIAC_FLAG_DIACRITIC, /* Cham */
|
|
0x0aa42,
|
|
0x0aa43 | DIAC_FLAG_DIACRITIC, /* Cham */
|
|
0x0aa4b,
|
|
0x0aa4d | DIAC_FLAG_DIACRITIC, /* Cham */
|
|
0x0aa7a,
|
|
0x0aa7b | DIAC_FLAG_DIACRITIC, /* Cham */
|
|
0x0aaaf,
|
|
0x0aab0 | DIAC_FLAG_DIACRITIC, /* Tai Viet */
|
|
0x0aab1,
|
|
0x0aab4 | DIAC_FLAG_DIACRITIC, /* Tai Viet */
|
|
0x0aab6,
|
|
0x0aab8 | DIAC_FLAG_DIACRITIC, /* Tai Viet */
|
|
0x0aabd,
|
|
0x0aabf | DIAC_FLAG_DIACRITIC, /* Tai Viet */
|
|
0x0aac0,
|
|
0x0aac1 | DIAC_FLAG_DIACRITIC, /* Tai Viet */
|
|
0x0abe2,
|
|
0x0abea | DIAC_FLAG_DIACRITIC, /* Meetei Mayek */
|
|
0x0abeb,
|
|
0x0abed | DIAC_FLAG_DIACRITIC, /* Meetei Mayek */
|
|
0x0fb1d,
|
|
0x0fb1e | DIAC_FLAG_DIACRITIC, /* Alphabetic Presentation Forms */
|
|
0x0fe1f,
|
|
0x0fe26 | DIAC_FLAG_DIACRITIC, /* Combining Half Marks */
|
|
0x1107f,
|
|
0x11082 | DIAC_FLAG_DIACRITIC, /* Kaithi */
|
|
0x110af,
|
|
0x110ba | DIAC_FLAG_DIACRITIC, /* Kaithi */
|
|
0x1d164,
|
|
0x1d169 | DIAC_FLAG_DIACRITIC, /* Musical Symbols */
|
|
0x1d16c,
|
|
0x1d182 | DIAC_FLAG_DIACRITIC, /* Musical Symbols */
|
|
0x1d184,
|
|
0x1d18b | DIAC_FLAG_DIACRITIC, /* Musical Symbols */
|
|
0x1d1a9,
|
|
0x1d1ad | DIAC_FLAG_DIACRITIC, /* Musical Symbols */
|
|
0x1d241,
|
|
0x1d244 | DIAC_FLAG_DIACRITIC, /* Ancient Greek Musical Notation */
|
|
0xe01ef,
|
|
};
|
|
|
|
#define MRU_MAX_LEN 32
|
|
|
|
static unsigned short mru_len = 0;
|
|
static unsigned short diacritic_mru[MRU_MAX_LEN];
|
|
|
|
int is_diacritic(unsigned short char_code, bool *is_rtl)
|
|
{
|
|
unsigned short mru, i;
|
|
|
|
/* Search in MRU */
|
|
for (mru = 0; mru < mru_len; mru++)
|
|
{
|
|
i = diacritic_mru[mru];
|
|
|
|
/* Found in MRU */
|
|
if (DIAC_VAL(i - 1) < char_code && char_code <= DIAC_VAL(i))
|
|
goto Found;
|
|
}
|
|
|
|
/* Search in DB */
|
|
for (i = 1; i < DIAC_NUM_RANGES; i++)
|
|
{
|
|
/* Found */
|
|
if (char_code < DIAC_VAL(i))
|
|
break;
|
|
}
|
|
|
|
/* Add MRU entry, or overwrite LRU if MRU array is full */
|
|
if (mru_len < MRU_MAX_LEN)
|
|
mru_len++;
|
|
else
|
|
mru--;
|
|
|
|
Found:
|
|
/* Promote MRU item to top of MRU */
|
|
for ( ; mru > 0; mru--)
|
|
diacritic_mru[mru] = diacritic_mru[mru - 1];
|
|
diacritic_mru[0] = i;
|
|
|
|
/* Update RTL */
|
|
if (is_rtl)
|
|
*is_rtl = DIAC_IS_RTL(i);
|
|
|
|
return DIAC_IS_DIACRITIC(i);
|
|
}
|