/*************************************************************************** * __________ __ ___. * Open \______ \ ____ ____ | | _\_ |__ _______ ___ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ * \/ \/ \/ \/ \/ * $Id$ * * Copyright (C) 2002 Gilles Roux * 2003 Garrett Derner * 2010 Yoshihisa Uchida * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * ****************************************************************************/ #include "plugin.h" #include "ctype.h" #include "tv_preferences.h" #include "tv_text_processor.h" enum tv_text_type { TV_TEXT_UNKNOWN, TV_TEXT_MAC, TV_TEXT_UNIX, TV_TEXT_WIN, }; /* the max characters of each blocks */ #ifdef HAVE_LCD_BITMAP #define TV_MAX_CHARS_PER_BLOCK (LCD_WIDTH / 2 + 1) #else #define TV_MAX_CHARS_PER_BLOCK (LCD_WIDTH + 1) #endif #define TV_MAX_BLOCKS 5 /* * number of spaces to indent first paragraph * (this value uses the line mode is REFLOW only) */ #define TV_INDENT_SPACES 2 static const struct tv_preferences *prefs; static enum tv_text_type text_type = TV_TEXT_UNKNOWN; static const unsigned char *end_ptr; static unsigned short *ucsbuf[TV_MAX_BLOCKS]; static unsigned char *utf8buf; static unsigned char *outbuf; static int block_count; static int block_width; /* if this value is true, then tv_create_line_text returns a blank line. */ static bool expand_extra_line = false; /* when a line is divided, this value sets true. */ static bool is_break_line = false; static unsigned short break_chars[] = { 0, /* halfwidth characters */ '\t', '\n', 0x0b, 0x0c, ' ', '!', ',', '-', '.', ':', ';', '?', 0xb7, /* fullwidth characters */ 0x2010, /* hyphen */ 0x3000, /* fullwidth space */ 0x3001, /* ideographic comma */ 0x3002, /* ideographic full stop */ 0x30fb, /* katakana middle dot */ 0x30fc, /* katakana-hiragana prolonged sound mark */ 0xff01, /* fullwidth exclamation mark */ 0xff0c, /* fullwidth comma */ 0xff0d, /* fullwidth hyphen-minus */ 0xff0e, /* fullwidth full stop */ 0xff1a, /* fullwidth colon */ 0xff1b, /* fullwidth semicolon */ 0xff1f, /* fullwidth question mark */ }; /* the characters which is not judged as space with isspace() */ static unsigned short extra_spaces[] = { 0, 0x3000 }; static int tv_glyph_width(int ch) { if (ch == '\n') return 0; if (ch == 0) ch = ' '; #ifdef HAVE_LCD_BITMAP /* the width of the diacritics charcter is 0 */ if (rb->is_diacritic(ch, NULL)) return 0; return rb->font_get_width(prefs->font, ch); #else return 1; #endif } static unsigned char *tv_get_ucs(const unsigned char *str, unsigned short *ch) { int count = 1; unsigned char utf8_tmp[3]; /* distinguish the text_type */ if (*str == '\r') { if (text_type == TV_TEXT_WIN || text_type == TV_TEXT_UNKNOWN) { if (str + 1 < end_ptr && *(str+1) == '\n') { if (text_type == TV_TEXT_UNKNOWN) text_type = TV_TEXT_WIN; *ch = '\n'; return (unsigned char *)str + 2; } if (text_type == TV_TEXT_UNKNOWN) text_type = TV_TEXT_MAC; } *ch = (text_type == TV_TEXT_MAC)? '\n' : ' '; return (unsigned char *)str + 1; } else if (*str == '\n') { if (text_type == TV_TEXT_UNKNOWN) text_type = TV_TEXT_UNIX; *ch = (text_type == TV_TEXT_UNIX)? '\n' : ' '; return (unsigned char *)str + 1; } if (prefs->encoding == UTF_8) return (unsigned char*)rb->utf8decode(str, ch); #ifdef HAVE_LCD_BITMAP if ((*str >= 0x80) && ((prefs->encoding > SJIS) || (prefs->encoding == SJIS && (*str <= 0xa0 || *str >= 0xe0)))) { if (str + 1 >= end_ptr) { end_ptr = str; *ch = 0; return (unsigned char *)str; } count = 2; } #endif rb->iso_decode(str, utf8_tmp, prefs->encoding, count); rb->utf8decode(utf8_tmp, ch); return (unsigned char *)str + count; } static void tv_decode2utf8(const unsigned short *ucs, int count) { int i; for (i = 0; i < count; i++) outbuf = rb->utf8encode(ucs[i], outbuf); *outbuf = '\0'; } static bool tv_is_line_break_char(unsigned short ch) { size_t i; /* when the word mode is CHOP, all characters does not break line. */ if (prefs->word_mode == CHOP) return false; for (i = 0; i < sizeof(break_chars); i++) { if (break_chars[i] == ch) return true; } return false; } static bool tv_isspace(unsigned short ch) { size_t i; if (ch < 128 && isspace(ch)) return true; for (i = 0; i < sizeof(extra_spaces); i++) { if (extra_spaces[i] == ch) return true; } return false; } static bool tv_is_break_line_join_mode(const unsigned char *next_str) { unsigned short ch; tv_get_ucs(next_str, &ch); return tv_isspace(ch); } static int tv_form_reflow_line(unsigned short *ucs, int chars) { unsigned short new_ucs[TV_MAX_CHARS_PER_BLOCK]; unsigned short *p = new_ucs; unsigned short ch; int i; int k; int expand_spaces; int indent_chars = 0; int nonspace_chars = 0; int nonspace_width = 0; int remain_spaces; int spaces = 0; int words_spaces; if (prefs->alignment == LEFT) { while (chars > 0 && ucs[chars-1] == ' ') chars--; } if (chars == 0) return 0; while (ucs[indent_chars] == ' ') indent_chars++; for (i = indent_chars; i < chars; i++) { ch = ucs[i]; if (ch == ' ') spaces++; else { nonspace_chars++; nonspace_width += tv_glyph_width(ch); } } if (spaces == 0) return chars; expand_spaces = (block_width - nonspace_width) / tv_glyph_width(' ') - indent_chars; if (indent_chars + nonspace_chars + expand_spaces > TV_MAX_CHARS_PER_BLOCK) expand_spaces = TV_MAX_CHARS_PER_BLOCK - indent_chars - nonspace_chars; words_spaces = expand_spaces / spaces; remain_spaces = expand_spaces - words_spaces * spaces; for (i = 0; i < indent_chars; i++) *p++ = ' '; for ( ; i < chars; i++) { ch = ucs[i]; *p++ = ch; if (ch == ' ') { for (k = ((remain_spaces > 0)? 0 : 1); k < words_spaces; k++) *p++ = ch; remain_spaces--; } } rb->memcpy(ucs, new_ucs, sizeof(unsigned short) * TV_MAX_CHARS_PER_BLOCK); return indent_chars + nonspace_chars + expand_spaces; } static void tv_align_right(int *block_chars) { unsigned short *cur_text; unsigned short *prev_text; unsigned short ch; int cur_block = block_count - 1; int prev_block; int cur_chars; int prev_chars; int idx; int break_pos; int break_width = 0; int append_width; int width; while (cur_block > 0) { cur_text = ucsbuf[cur_block]; cur_chars = block_chars[cur_block]; idx = cur_chars; width = 0; while(--idx >= 0) width += tv_glyph_width(cur_text[idx]); width = block_width - width; prev_block = cur_block - 1; do { prev_text = ucsbuf[prev_block]; prev_chars = block_chars[prev_block]; idx = prev_chars; append_width = 0; break_pos = prev_chars; while (append_width < width && idx > 0) { ch = prev_text[--idx]; if (tv_is_line_break_char(ch)) { break_pos = idx + 1; break_width = append_width; } append_width += tv_glyph_width(ch); } if (append_width > width) idx++; if (idx == 0) { break_pos = 0; break_width = append_width; } if (break_pos < prev_chars) append_width = break_width; /* the case of * (1) when the first character of the cur_text concatenates * the last character of the prev_text. * (2) the length of ucsbuf[block] is short (< 0.75 * block width) */ else if (((!tv_isspace(*cur_text) && !tv_isspace(prev_text[prev_chars - 1])) || (4 * width >= 3 * block_width))) { break_pos = idx; } if (break_pos < prev_chars) { rb->memmove(cur_text + prev_chars - break_pos, cur_text, block_chars[cur_block] * sizeof(unsigned short)); rb->memcpy(cur_text, prev_text + break_pos, (prev_chars - break_pos) * sizeof(unsigned short)); block_chars[prev_block] = break_pos; block_chars[cur_block ] += prev_chars - break_pos; } } while ((width -= append_width) > 0 && --prev_block >= 0); cur_block--; } } static int tv_parse_text(const unsigned char *src, unsigned short *ucs, int *ucs_chars, bool is_indent) { const unsigned char *cur = src; const unsigned char *next = src; const unsigned char *line_break_ptr = NULL; const unsigned char *line_end_ptr = NULL; unsigned short ch = 0; unsigned short prev_ch; int chars = 0; int gw; int i; int line_break_width = 0; int line_end_chars = 0; int width = 0; bool is_space = false; while (true) { cur = next; if (cur >= end_ptr) { line_end_ptr = cur; line_end_chars = chars; is_break_line = true; break; } prev_ch = ch; next = tv_get_ucs(cur, &ch); if (ch == '\n') { if (prefs->line_mode != JOIN || tv_is_break_line_join_mode(next)) { line_end_ptr = next; line_end_chars = chars; is_break_line = false; break; } if (prefs->word_mode == CHOP || tv_isspace(prev_ch)) continue; /* * when the line mode is JOIN and the word mode is WRAP, * the next character does not concatenate with the * previous character. */ ch = ' '; } else if ((is_space = tv_isspace(ch)) == true) { /* * when the line mode is REFLOW: * (1) spacelike character convert to ' ' * (2) plural spaces are collected to one */ if (prefs->line_mode == REFLOW) { ch = ' '; if (prev_ch == ch) continue; } /* when the alignment is RIGHT, ignores indent spaces. */ if (prefs->alignment == RIGHT && is_indent) continue; } else is_indent = false; if (prefs->line_mode == REFLOW && is_indent) gw = tv_glyph_width(ch) * TV_INDENT_SPACES; else gw = tv_glyph_width(ch); width += gw; if (width > block_width) { width -= gw; if (is_space) { line_end_ptr = cur; line_end_chars = chars; } is_break_line = true; break; } if (prefs->line_mode == REFLOW && is_indent) { for (i = 1; i < TV_INDENT_SPACES; i++) ucs[chars++] = ch; } ucs[chars++] = ch; if (tv_is_line_break_char(ch)) { line_break_ptr = next; line_break_width = width; line_end_chars = chars; } if (chars >= TV_MAX_CHARS_PER_BLOCK) { is_break_line = true; break; } } /* set the end position and character count */ if (line_end_ptr == NULL) { /* * when the last line break position is too short (line length < 0.75 * block width), * the line is cut off at the position where it is closest to the displayed width. */ if ((prefs->line_mode == REFLOW && line_break_ptr == NULL) || (4 * line_break_width < 3 * block_width)) { line_end_ptr = cur; line_end_chars = chars; } else line_end_ptr = line_break_ptr; } *ucs_chars = line_end_chars; return line_end_ptr - src; } int tv_create_formed_text(const unsigned char *src, ssize_t bufsize, int block, bool is_multi, const unsigned char **dst) { unsigned short ch; int chars[block_count]; int i; int size = 0; bool is_indent; outbuf = utf8buf; *outbuf = '\0'; for (i = 0; i < block_count; i++) chars[i] = 0; if (dst != NULL) *dst = utf8buf; if (prefs->line_mode == EXPAND && (expand_extra_line = !expand_extra_line) == true) return 0; end_ptr = src + bufsize; tv_get_ucs(src, &ch); is_indent = (tv_isspace(ch) && !is_break_line); for (i = 0; i < block_count; i++) { size += tv_parse_text(src + size, ucsbuf[i], &chars[i], is_indent); if (!is_break_line) break; is_indent = false; } if (dst != NULL) { if (prefs->alignment == RIGHT) tv_align_right(chars); for (i = 0; i < block_count; i++) { if (i == block || (is_multi && i == block + 1)) { if (is_break_line && prefs->line_mode == REFLOW) chars[i] = tv_form_reflow_line(ucsbuf[i], chars[i]); tv_decode2utf8(ucsbuf[i], chars[i]); } } } return size; } bool tv_init_text_processor(unsigned char *buf, size_t bufsize, size_t *used_size) { int i; *used_size = TV_MAX_CHARS_PER_BLOCK * (2 * 3 + TV_MAX_BLOCKS * sizeof(unsigned short)); if (bufsize < *used_size) return false; prefs = tv_get_preferences(); text_type = TV_TEXT_UNKNOWN; expand_extra_line = false; is_break_line = false; ucsbuf[0] = (unsigned short*)buf; for (i = 1; i < TV_MAX_BLOCKS; i++) ucsbuf[i] = ucsbuf[i - 1] + TV_MAX_CHARS_PER_BLOCK; utf8buf = buf + TV_MAX_CHARS_PER_BLOCK * TV_MAX_BLOCKS * sizeof(unsigned short); return true; } void tv_set_creation_conditions(int blocks, int width) { block_count = blocks; block_width = width; }