/*************************************************************************** * __________ __ ___. * Open \______ \ ____ ____ | | _\_ |__ _______ ___ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ * \/ \/ \/ \/ \/ * $Id$ * * Copyright (C) 2004 Jörg Hohensohn * * This module collects the Talkbox and voice UI functions. * (Talkbox reads directory names from mp3 clips called thumbnails, * the voice UI lets menus and screens "talk" from a voicefile in memory. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * ****************************************************************************/ #include #include #include "string-extra.h" #include "file.h" #include "system.h" #include "kernel.h" #include "settings.h" #include "settings_list.h" #include "splash.h" #if CONFIG_CODEC == SWCODEC #include "voice_thread.h" #else #include "mp3_playback.h" #endif #include "audio.h" #include "lang.h" #include "talk.h" #include "metadata.h" /*#define LOGF_ENABLE*/ #include "logf.h" #include "bitswap.h" #include "structec.h" #include "plugin.h" /* plugin_get_buffer() */ #include "debug.h" #include "panic.h" #include "misc.h" /* time_split_units() */ /* Memory layout varies between targets because the Archos (MASCODEC) devices cannot mix voice and audio playback MASCODEC | MASCODEC | SWCODEC (playing) | (stopped) | voicebuf-----------+-----------+------------ audio | voice | voice |-----------|------------ | thumbnail | thumbnail | |------------ | | filebuf | |------------ | | audio voicebufend----------+-----------+------------ SWCODEC allocates dedicated buffers (except voice and thumbnail are together in the talkbuf), MASCODEC reuses audiobuf. */ /***************** Constants *****************/ #define QUEUE_SIZE 64 /* must be a power of two */ #define QUEUE_MASK (QUEUE_SIZE-1) const char* const dir_thumbnail_name = "_dirname.talk"; const char* const file_thumbnail_ext = ".talk"; /***************** Functional Macros *****************/ #define QUEUE_LEVEL ((queue_write - queue_read) & QUEUE_MASK) #define LOADED_MASK 0x80000000 /* MSB */ #define DEFAULT_VOICE_LANG "english" /***************** Data types *****************/ struct clip_entry /* one entry of the index table */ { int offset; /* offset from start of voicefile file */ int size; /* size of the clip */ }; struct voicefile_header /* file format of our voice file */ { int version; /* version of the voicefile */ int target_id; /* the rockbox target the file was made for */ int table; /* offset to index table, (=header size) */ int id1_max; /* number of "normal" clips contained in above index */ int id2_max; /* number of "voice only" clips contained in above index */ /* The header is folled by the index tables (n*struct clip_entry), * which is followed by the mp3/speex encoded clip data */ }; /***************** Globals *****************/ #if (CONFIG_CODEC == SWCODEC && MEMORYSIZE <= 2) || defined(ONDIO_SERIES) /* On low memory swcodec targets the entire voice file wouldn't fit in memory * together with codecs, so we load clips each time they are accessed. * The Ondios have slow storage access and loading the entire voice file would * take several seconds, so we use the same mechanism. */ #define TALK_PROGRESSIVE_LOAD #if !defined(ONDIO_SERIES) /* 70+ clips should fit into 100k */ #define MAX_CLIP_BUFFER_SIZE (100000) #endif #endif #ifndef MAX_CLIP_BUFFER_SIZE /* 1GB should be enough for everybody; will be capped to voicefile size */ #define MAX_CLIP_BUFFER_SIZE (1<<30) #endif #define THUMBNAIL_RESERVE (50000) /* Multiple thumbnails can be loaded back-to-back in this buffer. */ static volatile int thumbnail_buf_used SHAREDBSS_ATTR; /* length of data in thumbnail buffer */ static struct voicefile_header voicefile; /* loaded voicefile */ static bool has_voicefile; /* a voicefile file is present */ static bool need_shutup; /* is there possibly any voice playing to be shutup */ static bool force_enqueue_next; /* enqueue next utterance even if enqueue is false */ static int queue_write; /* write index of queue, by application */ static int queue_read; /* read index of queue, by ISR context */ static enum talk_status talk_status = TALK_STATUS_OK; #if CONFIG_CODEC == SWCODEC /* protects queue_read, queue_write and thumbnail_buf_used */ static struct mutex queue_mutex SHAREDBSS_ATTR; #define talk_queue_lock() ({ mutex_lock(&queue_mutex); }) #define talk_queue_unlock() ({ mutex_unlock(&queue_mutex); }) #else #define talk_queue_lock() ({ }) #define talk_queue_unlock() ({ }) #endif /* CONFIG_CODEC */ static int sent; /* how many bytes handed over to playback, owned by ISR */ static unsigned char curr_hd[3]; /* current frame header, for re-sync */ static unsigned char last_lang[MAX_FILENAME+1]; /* name of last used lang file (in talk_init) */ static bool talk_initialized; /* true if talk_init has been called */ static bool give_buffer_away; /* true if we should give the buffers away in shrink_callback if requested */ static int talk_temp_disable_count; /* if positive, temporarily disable voice UI (not saved) */ /* size of the voice data in the voice file and the actually allocated buffer * for it. voicebuf_size is always smaller or equal to voicefile_size */ static unsigned long voicefile_size, voicebuf_size; struct queue_entry /* one entry of the internal queue */ { int handle; /* buflib handle to the clip data */ int length; /* total length of the clip */ int remaining; /* bytes that still need to be deoded */ }; static struct buflib_context clip_ctx; struct clip_cache_metadata { long tick; int handle, voice_id; }; static int metadata_table_handle; static unsigned max_clips; static int cache_hits, cache_misses; static struct queue_entry queue[QUEUE_SIZE]; /* queue of scheduled clips */ static struct queue_entry silence, *last_clip; /***************** Private implementation *****************/ static int index_handle, talk_handle; static int move_callback(int handle, void *current, void *new) { (void)current; if (handle == talk_handle && !buflib_context_relocate(&clip_ctx, new)) return BUFLIB_CB_CANNOT_MOVE; return BUFLIB_CB_OK; } static struct mutex read_buffer_mutex; /* on HWCODEC only voice xor audio can be active at a time */ static bool check_audio_status(void) { #if CONFIG_CODEC != SWCODEC if (audio_status()) /* busy, buffer in use */ return false; /* ensure playback is given up on the buffer */ audio_hard_stop(); #endif return true; } /* ISR (mp3_callback()) must not run during moving of the clip buffer, * because the MAS may get out-of-sync */ static void sync_callback(int handle, bool sync_on) { (void) handle; if (sync_on) mutex_lock(&read_buffer_mutex); else mutex_unlock(&read_buffer_mutex); #if CONFIG_CPU == SH7034 /* DMA must not interrupt during buffer move or commit_buffer copies * from inconsistent buflib buffer */ if (sync_on) CHCR3 &= ~0x0001; /* disable the DMA (and therefore the interrupt also) */ else CHCR3 |= 0x0001; /* re-enable the DMA */ #endif } static ssize_t read_to_handle_ex(int fd, struct buflib_context *ctx, int handle, int handle_offset, size_t count) { unsigned char *buf; ssize_t ret; mutex_lock(&read_buffer_mutex); if (!ctx) buf = core_get_data(handle); else buf = buflib_get_data(ctx, handle); buf += handle_offset; ret = read(fd, buf, count); mutex_unlock(&read_buffer_mutex); return ret; } static ssize_t read_to_handle(int fd, int handle, int handle_offset, size_t count) { return read_to_handle_ex(fd, NULL, handle, handle_offset, count); } static int shrink_callback(int handle, unsigned hints, void *start, size_t old_size) { (void)start;(void)old_size; int *h; #if (MAX_CLIP_BUFFER_SIZE < (MEMORYSIZE<<20) || (MEMORYSIZE > 2)) /* on low-mem and when the voice buffer size is not limited (i.e. * on 2MB HWCODEC) we effectively own the entire buffer because * the voicefile takes up all RAM. This blocks other Rockbox parts * from allocating, especially during bootup. Therefore always give * up the buffer and reload when clips are played back. On high-mem * or when the clip buffer is limited to a few 100K this provision is * not necessary. */ if (give_buffer_away && (hints & BUFLIB_SHRINK_POS_MASK) == BUFLIB_SHRINK_POS_MASK) #else (void)hints; #endif { if (handle == talk_handle) h = &talk_handle; else if (handle == index_handle) h = &index_handle; else h = NULL; mutex_lock(&read_buffer_mutex); /* the clip buffer isn't usable without index table */ if (handle == index_handle && talk_handle > 0) talk_handle = core_free(talk_handle); if (h) *h = core_free(handle); mutex_unlock(&read_buffer_mutex); return BUFLIB_CB_OK; } return BUFLIB_CB_CANNOT_SHRINK; } static struct buflib_callbacks talk_ops = { .move_callback = move_callback, .sync_callback = sync_callback, .shrink_callback = shrink_callback, }; static int open_voicefile(void) { char buf[64]; char* p_lang = DEFAULT_VOICE_LANG; /* default */ if ( global_settings.lang_file[0] && global_settings.lang_file[0] != 0xff ) { /* try to open the voice file of the selected language */ p_lang = (char *)global_settings.lang_file; } snprintf(buf, sizeof(buf), LANG_DIR "/%s.voice", p_lang); return open(buf, O_RDONLY); } static int id2index(int id) { int index = id; if (id > VOICEONLY_DELIMITER) { /* voice-only entries use the second part of the table. The first string comes after VOICEONLY_DELIMITER so we need to substract VOICEONLY_DELIMITER + 1 */ index -= VOICEONLY_DELIMITER + 1; if (index >= voicefile.id2_max) return -1; /* must be newer than we have */ index += voicefile.id1_max; /* table 2 is behind table 1 */ } else { /* normal use of the first table */ if (id >= voicefile.id1_max) return -1; /* must be newer than we have */ } return index; } #ifndef TALK_PROGRESSIVE_LOAD static int index2id(int index) { int id = index; if (index >= voicefile.id2_max + voicefile.id1_max) return -1; if (index >= voicefile.id1_max) { /* must be voice-only if it exceeds table 1 */ id -= voicefile.id1_max; /* The first string comes after VOICEONLY_DELIMITER so we need to add VOICEONLY_DELIMITER + 1 */ id += VOICEONLY_DELIMITER + 1; } return id; } #endif static int free_oldest_clip(void) { unsigned i; int oldest = 0; bool thumb = false; long age, now; struct clip_entry* clipbuf; struct clip_cache_metadata *cc = buflib_get_data(&clip_ctx, metadata_table_handle); for(age = i = 0, now = current_tick; i < max_clips; i++) { if (cc[i].handle) { if (thumb && cc[i].voice_id == VOICEONLY_DELIMITER && (now - cc[i].tick) > age) { /* thumb clips are freed first */ age = now - cc[i].tick; oldest = i; } else if (!thumb) { if (cc[i].voice_id == VOICEONLY_DELIMITER) { age = now - cc[i].tick; oldest = i; thumb = true; } else if ((now - cc[i].tick) > age && cc[i].voice_id != VOICE_PAUSE) { /* find the last-used clip but never consider silence */ age = now - cc[i].tick; oldest = i; } } } } /* free the last one if no oldest one could be determined */ cc = &cc[oldest]; cc->handle = buflib_free(&clip_ctx, cc->handle); /* need to clear the LOADED bit too (not for thumb clips) */ if (cc->voice_id != VOICEONLY_DELIMITER) { clipbuf = core_get_data(index_handle); clipbuf[id2index(cc->voice_id)].size &= ~LOADED_MASK; } return oldest; } /* common code for load_initial_clips() and get_clip() */ static void add_cache_entry(int clip_handle, int table_index, int id) { unsigned i; struct clip_cache_metadata *cc = buflib_get_data(&clip_ctx, metadata_table_handle); if (table_index != -1) { /* explicit slot; use that */ cc = &cc[table_index]; if (cc->handle > 0) panicf("%s(): Slot already used", __func__); } else { /* find an empty slot */ for(i = 0; cc[i].handle && i < max_clips; i++) ; if (i == max_clips) /* no free slot in the cache table? */ i = free_oldest_clip(); cc = &cc[i]; } cc->handle = clip_handle; cc->tick = current_tick; cc->voice_id = id; } static ssize_t read_clip_data(int fd, int index, int clip_handle) { struct clip_entry* clipbuf; size_t clipsize; ssize_t ret; if (fd < 0) { buflib_free(&clip_ctx, clip_handle); return -1; /* open error */ } clipbuf = core_get_data(index_handle); /* this must not be called with LOADED_MASK set in clipsize */ clipsize = clipbuf[index].size; lseek(fd, clipbuf[index].offset, SEEK_SET); ret = read_to_handle_ex(fd, &clip_ctx, clip_handle, 0, clipsize); if (ret < 0 || clipsize != (size_t)ret) { buflib_free(&clip_ctx, clip_handle); return -2; /* read error */ } clipbuf = core_get_data(index_handle); clipbuf[index].size |= LOADED_MASK; /* mark as loaded */ return ret; } static void load_initial_clips(int fd) { #if defined(TALK_PROGRESSIVE_LOAD) (void) fd; #else unsigned index, i; unsigned num_clips = voicefile.id1_max + voicefile.id2_max; for(index = i = 0; index < num_clips && i < max_clips; index++) { int handle; struct clip_entry* clipbuf = core_get_data(index_handle); size_t clipsize = clipbuf[index].size; ssize_t ret; if (clipsize == 0) /* clip not included in voicefile */ continue; handle = buflib_alloc(&clip_ctx, clipsize); if (handle < 0) break; ret = read_clip_data(fd, index, handle); if (ret < 0) break; add_cache_entry(handle, i++, index2id(index)); } #endif } /* fetch a clip from the voice file */ static int get_clip(long id, struct queue_entry *q) { int index; int retval = -1; struct clip_entry* clipbuf; size_t clipsize; index = id2index(id); if (index == -1) return -1; clipbuf = core_get_data(index_handle); clipsize = clipbuf[index].size; if (clipsize == 0) /* clip not included in voicefile */ return -1; if (!(clipsize & LOADED_MASK)) { /* clip needs loading */ int fd, handle, oldest = -1; ssize_t ret; cache_misses++; /* free clips from cache until this one succeeds to allocate */ while ((handle = buflib_alloc(&clip_ctx, clipsize)) < 0) oldest = free_oldest_clip(); /* handle should now hold a valid alloc. Load from disk * and insert into cache */ fd = open_voicefile(); ret = read_clip_data(fd, index, handle); close(fd); if (ret < 0) return ret; /* finally insert into metadata table */ add_cache_entry(handle, oldest, id); retval = handle; } else { /* clip is in memory already; find where it was loaded */ cache_hits++; struct clip_cache_metadata *cc; static int i; cc = buflib_get_data(&clip_ctx, metadata_table_handle); for (i = 0; cc[i].voice_id != id || !cc[i].handle; i++) ; cc[i].tick = current_tick; /* reset age */ clipsize &= ~LOADED_MASK; /* without the extra bit gives true size */ retval = cc[i].handle; } q->handle = retval; q->length = clipsize; q->remaining = clipsize; return 0; } static bool load_index_table(int fd, const struct voicefile_header *hdr) { ssize_t ret; struct clip_entry *buf; if (index_handle > 0) /* nothing to do? */ return true; ssize_t alloc_size = (hdr->id1_max + hdr->id2_max) * sizeof(struct clip_entry); index_handle = core_alloc_ex("voice index", alloc_size, &talk_ops); if (index_handle < 0) return false; ret = read_to_handle(fd, index_handle, 0, alloc_size); if (ret == alloc_size) { buf = core_get_data(index_handle); for (int i = 0; i < hdr->id1_max + hdr->id2_max; i++) { #ifdef ROCKBOX_LITTLE_ENDIAN /* doesn't yield() */ structec_convert(&buf[i], "ll", 1, true); #endif } } else index_handle = core_free(index_handle); return ret == alloc_size; } static bool load_header(int fd, struct voicefile_header *hdr) { ssize_t got_size = read(fd, hdr, sizeof(*hdr)); if (got_size != sizeof(*hdr)) return false; #ifdef ROCKBOX_LITTLE_ENDIAN logf("Byte swapping voice file"); structec_convert(&voicefile, "lllll", 1, true); #endif return true; } static bool create_clip_buffer(size_t max_size) { size_t alloc_size; /* just allocate, populate on an as-needed basis later */ talk_handle = core_alloc_ex("voice data", max_size, &talk_ops); if (talk_handle < 0) goto alloc_err; buflib_init(&clip_ctx, core_get_data(talk_handle), max_size); /* the first alloc is the clip metadata table */ alloc_size = max_clips * sizeof(struct clip_cache_metadata); metadata_table_handle = buflib_alloc(&clip_ctx, alloc_size); memset(buflib_get_data(&clip_ctx, metadata_table_handle), 0, alloc_size); return true; alloc_err: talk_status = TALK_STATUS_ERR_ALLOC; index_handle = core_free(index_handle); return false; } /* load the voice file into the mp3 buffer */ static bool load_voicefile_index(int fd) { if (fd < 0) /* failed to open */ { talk_status = TALK_STATUS_ERR_NOFILE; return false; } /* load the header first */ if (!load_header(fd, &voicefile)) { talk_status = TALK_STATUS_ERR_INCOMPATIBLE; return false; } /* format check */ if (voicefile.table == sizeof(struct voicefile_header)) { if (voicefile.version == VOICE_VERSION && voicefile.target_id == TARGET_ID && voicefile.id1_max == TALK_FINAL_ID && voicefile.id2_max == TALK_FINAL_ID_VOICEONLY - VOICEONLY_DELIMITER) { if (load_index_table(fd, &voicefile)) return true; } } talk_status = TALK_STATUS_ERR_INCOMPATIBLE; logf("Incompatible voice file"); logf("version %d expected %d", voicefile.version, VOICE_VERSION); logf("target_id %d expected %d", voicefile.target_id, TARGET_ID); logf("id1_max %d expected %d", voicefile.id1_max, TALK_FINAL_ID); logf("id2_max %d expected %d", voicefile.id2_max, TALK_FINAL_ID_VOICEONLY - VOICEONLY_DELIMITER); return false; } /* this function caps the voicefile buffer and allocates it. It can * be called after talk_init(), e.g. when the voice was temporarily disabled. * The buffer size has to be capped again each time because the available * audio buffer changes over time */ static bool load_voicefile_data(int fd) { voicebuf_size = voicefile_size; /* cap to the max. number of clips or the size of the available audio * buffer which we grab. We leave some to the rest of the system. * While that reduces our buffer size it improves the chance that * other allocs succeed without disabling voice which would require * reloading the voice from disk (as we do not shrink our buffer when * other code attempts new allocs these would fail) */ ssize_t cap = MIN(MAX_CLIP_BUFFER_SIZE, audio_buffer_available() - (64<<10)); if (UNLIKELY(cap < 0)) { logf("Not enough memory for voice. Disabling...\n"); talk_status = TALK_STATUS_ERR_OOM; return false; } else if (voicebuf_size > (size_t)cap) voicebuf_size = cap; /* just allocate, populate on an as-needed basis later * re-create the clip buffer to ensure clip_ctx is up-to-date */ if (talk_handle > 0) talk_handle = core_free(talk_handle); if (!create_clip_buffer(voicebuf_size)) return false; load_initial_clips(fd); /* make sure to have the silence clip, if available return value can * be cached globally even for TALK_PROGRESSIVE_LOAD because the * VOICE_PAUSE clip is specially handled */ get_clip(VOICE_PAUSE, &silence); return true; } /* Use a static buffer to avoid difficulties with buflib during DMA * (hwcodec)/buffer passing to the voice_thread (swcodec). Clips * can be played in chunks so the size is not that important */ static unsigned char commit_buffer[2<<10]; static void* commit_transfer(struct queue_entry *qe, size_t *size) { void *buf = NULL; /* shut up gcc */ static unsigned char *bufpos = commit_buffer; #if CONFIG_CODEC != SWCODEC sent = MIN(qe->remaining, 0xFFFF); #else sent = qe->remaining; #endif sent = MIN((size_t)sent, sizeof(commit_buffer)); buf = buflib_get_data(&clip_ctx, qe->handle); /* adjust buffer position to what has been played already */ buf += (qe->length - qe->remaining); memcpy(bufpos, buf, sent); *size = sent; return commit_buffer; } static inline bool is_silence(struct queue_entry *qe) { if (silence.handle > 0) /* silence clip available? */ return (qe->handle == silence.handle); else return false; } /* called in ISR context (on HWCODEC) if mp3 data got consumed */ static void mp3_callback(const void** start, size_t* size) { struct queue_entry *qe = &queue[queue_read]; #if CONFIG_CODEC == SWCODEC /* voice_thread.c hints us how many of the buffer we provided it actually * consumed. Because buffers have to be frame-aligned for speex * it might be less than what we presented */ if (*size) sent = *size; #endif qe->remaining -= sent; /* we completed this */ if (qe->remaining > 0) /* current clip not finished? */ { /* feed the next 64K-1 chunk */ *start = commit_transfer(qe, size); return; } talk_queue_lock(); /* increment read position for the just played clip */ queue_read = (queue_read + 1) & QUEUE_MASK; if (QUEUE_LEVEL == 0) { if (!is_silence(last_clip)) { /* add silence clip when queue runs empty playing a voice clip, * only if the previous clip wasn't already silence */ queue[queue_write] = silence; queue_write = (queue_write + 1) & QUEUE_MASK; } else { *size = 0; /* end of data */ } } if (QUEUE_LEVEL != 0) /* queue is not empty? */ { /* start next clip */ last_clip = &queue[queue_read]; *start = commit_transfer(last_clip, size); curr_hd[0] = commit_buffer[1]; curr_hd[1] = commit_buffer[2]; curr_hd[2] = commit_buffer[3]; } talk_queue_unlock(); } /***************** Public routines *****************/ /* stop the playback and the pending clips */ void talk_force_shutup(void) { /* Most of this is MAS only */ #if CONFIG_CODEC != SWCODEC #ifdef SIMULATOR return; #endif unsigned char* pos; unsigned char* search; unsigned char* end; int len; if (QUEUE_LEVEL == 0) /* has ended anyway */ return; #if CONFIG_CPU == SH7034 CHCR3 &= ~0x0001; /* disable the DMA (and therefore the interrupt also) */ #endif /* CONFIG_CPU == SH7034 */ /* search next frame boundary and continue up to there */ pos = search = mp3_get_pos(); end = buflib_get_data(&clip_ctx, queue[queue_read].handle); len = queue[queue_read].length; if (pos >= end && pos <= (end+len)) /* really our clip? */ { /* (for strange reasons this isn't nesessarily the case) */ /* find the next frame boundary */ while (search < (end+len)) /* search the remaining data */ { if (*search++ != 0xFF) /* quick search for frame sync byte */ continue; /* (this does the majority of the job) */ /* look at the (bitswapped) rest of header candidate */ if (search[0] == curr_hd[0] /* do the quicker checks first */ && search[2] == curr_hd[2] && (search[1] & 0x30) == (curr_hd[1] & 0x30)) /* sample rate */ { search--; /* back to the sync byte */ break; /* From looking at it, this is our header. */ } } if (search-pos) { /* play old data until the frame end, to keep the MAS in sync */ sent = search-pos; queue_write = (queue_read + 1) & QUEUE_MASK; /* will be empty after next callback */ queue[queue_read].length = sent; /* current one ends after this */ #if CONFIG_CPU == SH7034 DTCR3 = sent; /* let the DMA finish this frame */ CHCR3 |= 0x0001; /* re-enable DMA */ #endif /* CONFIG_CPU == SH7034 */ thumbnail_buf_used = 0; return; } } #endif /* CONFIG_CODEC != SWCODEC */ /* Either SWCODEC, or MAS had nothing to do (was frame boundary or not our clip) */ mp3_play_stop(); talk_queue_lock(); queue_write = queue_read = 0; /* reset the queue */ thumbnail_buf_used = 0; talk_queue_unlock(); need_shutup = false; } /* Shutup the voice, except if force_enqueue_next is set. */ void talk_shutup(void) { if (need_shutup && !force_enqueue_next) talk_force_shutup(); } /* schedule a clip, at the end or discard the existing queue */ static void queue_clip(struct queue_entry *clip, bool enqueue) { struct queue_entry *qe; int queue_level; if (!enqueue) talk_shutup(); /* cut off all the pending stuff */ /* Something is being enqueued, force_enqueue_next override is no longer in effect. */ force_enqueue_next = false; if (!clip->length) return; /* safety check */ #if CONFIG_CPU == SH7034 /* disable the DMA temporarily, to be safe of race condition */ CHCR3 &= ~0x0001; #endif talk_queue_lock(); queue_level = QUEUE_LEVEL; /* check old level */ qe = &queue[queue_write]; if (queue_level < QUEUE_SIZE - 1) /* space left? */ { queue[queue_write] = *clip; queue_write = (queue_write + 1) & QUEUE_MASK; } talk_queue_unlock(); if (queue_level == 0) { /* queue was empty, we have to do the initial start */ size_t size; void *buf = commit_transfer(qe, &size); last_clip = qe; mp3_play_data(buf, size, mp3_callback); curr_hd[0] = commit_buffer[1]; curr_hd[1] = commit_buffer[2]; curr_hd[2] = commit_buffer[3]; mp3_play_pause(true); /* kickoff audio */ } else { #if CONFIG_CPU == SH7034 CHCR3 |= 0x0001; /* re-enable DMA */ #endif } need_shutup = true; return; } /* return if a voice codec is required or not */ static bool talk_voice_required(void) { return (has_voicefile) /* Voice file is available */ || (global_settings.talk_dir_clip) /* Thumbnail clips are required */ || (global_settings.talk_file_clip); } /***************** Public implementation *****************/ void talk_init(void) { int filehandle; talk_temp_disable_count = 0; if (talk_initialized && !strcasecmp(last_lang, global_settings.lang_file)) { /* not a new file, nothing to do */ return; } if(!talk_initialized) { #if CONFIG_CODEC == SWCODEC mutex_init(&queue_mutex); #endif /* CONFIG_CODEC == SWCODEC */ mutex_init(&read_buffer_mutex); } talk_initialized = true; strlcpy((char *)last_lang, (char *)global_settings.lang_file, MAX_FILENAME); /* reset some states */ queue_write = queue_read = 0; /* reset the queue */ memset(&voicefile, 0, sizeof(voicefile)); silence.handle = -1; /* pause clip not accessible */ voicefile_size = has_voicefile = 0; /* need to free these as their size depends on the voice file, and * this function is called when the talk voice file changes */ if (index_handle > 0) index_handle = core_free(index_handle); if (talk_handle > 0) talk_handle = core_free(talk_handle); /* don't free thumb handle, it doesn't depend on the actual voice file * and so we can re-use it if it's already allocated in any event */ filehandle = open_voicefile(); if (filehandle > 0) { if (!load_voicefile_index(filehandle)) { splashf(HZ * 2, str(LANG_READ_FAILED), ".voice"); goto out; } /* Now determine the maximum buffer size needed for the voicefile. * The below pretends the entire voicefile would be loaded. The buffer * size is eventually capped later on in load_voicefile_data() */ int num_clips = voicefile.id1_max + voicefile.id2_max; int non_empty = num_clips; int total_size = 0, avg_size; struct clip_entry *clips = core_get_data(index_handle); /* check for the average clip size to estimate the maximum number of * clips the buffer can hold */ for (int i = 0; i pretent success */ if (talk_temp_disable_count > 0) return -1; /* talking has been disabled */ if (!check_audio_status()) return -1; if (talk_handle <= 0 || index_handle <= 0) /* reload needed? */ { int fd = open_voicefile(); if (fd < 0 || !load_voicefile_index(fd)) return -1; load_voicefile_data(fd); close(fd); } if (id == -1) /* -1 is an indication for silence */ return -1; decimals = (((uint32_t)id) >> DECIMAL_SHIFT) & 0x7; /* check if this is a special ID, with a value */ unit = ((uint32_t)id) >> UNIT_SHIFT; if (unit || decimals) { /* sign-extend the value */ id = (uint32_t)id << (32-DECIMAL_SHIFT); id >>= (32-DECIMAL_SHIFT); talk_value_decimal(id, unit, decimals, enqueue); /* speak it */ return 0; /* and stop, end of special case */ } if (get_clip(id, &clip) < 0) return -1; /* not present */ #ifdef LOGF_ENABLE if (id > VOICEONLY_DELIMITER) logf("\ntalk_id: Say voice clip 0x%x\n", id); else logf("\ntalk_id: Say '%s'\n", str(id)); #endif queue_clip(&clip, enqueue); return 0; } /* Speaks zero or more IDs (from an array). */ int talk_idarray(const long *ids, bool enqueue) { int r; if(!ids) return 0; while(*ids != TALK_FINAL_ID) { if((r = talk_id(*ids++, enqueue)) <0) return r; enqueue = true; } return 0; } /* Make sure the current utterance is not interrupted by the next one. */ void talk_force_enqueue_next(void) { force_enqueue_next = true; } /* play a thumbnail from file */ /* Returns size of spoken thumbnail, so >0 means something is spoken, <=0 means something went wrong. */ static int _talk_file(const char* filename, const long *prefix_ids, bool enqueue) { int fd; int size; int handle, oldest = -1; #if CONFIG_CODEC != SWCODEC struct mp3entry info; #endif /* reload needed? */ if (talk_temp_disable_count > 0) return -1; /* talking has been disabled */ if (!check_audio_status()) return -1; if (talk_handle <= 0 || index_handle <= 0) { int fd = open_voicefile(); if (fd < 0 || !load_voicefile_index(fd)) return -1; load_voicefile_data(fd); close(fd); } #if CONFIG_CODEC != SWCODEC if(mp3info(&info, filename)) /* use this to find real start */ { return 0; /* failed to open, or invalid */ } #endif if (!enqueue) /* shutup now to free the thumbnail buffer */ talk_shutup(); fd = open(filename, O_RDONLY); if (fd < 0) /* failed to open */ { return 0; } size = filesize(fd); #if CONFIG_CODEC != SWCODEC size -= lseek(fd, info.first_frame_offset, SEEK_SET); /* behind ID data */ #endif /* free clips from cache until this one succeeds to allocate */ while ((handle = buflib_alloc(&clip_ctx, size)) < 0) oldest = free_oldest_clip(); size = read_to_handle_ex(fd, &clip_ctx, handle, 0, size); close(fd); /* ToDo: find audio, skip ID headers and trailers */ if (size > 0) /* Don't play missing clips */ { struct queue_entry clip; clip.handle = handle; clip.length = clip.remaining = size; #if CONFIG_CODEC != SWCODEC && !defined(SIMULATOR) /* bitswap doesnt yield() */ bitswap(buflib_get_data(&clip_ctx, handle), size); #endif if(prefix_ids) /* prefix thumbnail by speaking these ids, but only now that we know there's actually a thumbnail to be spoken. */ talk_idarray(prefix_ids, true); /* finally insert into metadata table. thumb clips go under the * VOICEONLY_DELIMITER id so the cache can distinguish them from * normal clips */ add_cache_entry(handle, oldest, VOICEONLY_DELIMITER); queue_clip(&clip, true); } else buflib_free(&clip_ctx, handle); return size; } int talk_file(const char *root, const char *dir, const char *file, const char *ext, const long *prefix_ids, bool enqueue) /* Play a thumbnail file */ { char buf[MAX_PATH]; /* Does root end with a slash */ char *slash = (root && root[0] && root[strlen(root)-1] != '/') ? "/" : ""; snprintf(buf, MAX_PATH, "%s%s%s%s%s%s", root ? root : "", slash, dir ? dir : "", dir ? "/" : "", file ? file : "", ext ? ext : ""); return _talk_file(buf, prefix_ids, enqueue); } static int talk_spell_basename(const char *path, const long *prefix_ids, bool enqueue) { if(prefix_ids) { talk_idarray(prefix_ids, enqueue); enqueue = true; } char buf[MAX_PATH]; /* Spell only the path component after the last slash */ strlcpy(buf, path, sizeof(buf)); if(strlen(buf) >1 && buf[strlen(buf)-1] == '/') /* strip trailing slash */ buf[strlen(buf)-1] = '\0'; char *ptr = strrchr(buf, '/'); if(ptr && strlen(buf) >1) ++ptr; else ptr = buf; return talk_spell(ptr, enqueue); } /* Play a file's .talk thumbnail, fallback to spelling the filename, or go straight to spelling depending on settings. */ int talk_file_or_spell(const char *dirname, const char *filename, const long *prefix_ids, bool enqueue) { if (global_settings.talk_file_clip) { /* .talk clips enabled */ if(talk_file(dirname, NULL, filename, file_thumbnail_ext, prefix_ids, enqueue) >0) return 0; } if (global_settings.talk_file == 2) /* Either .talk clips are disabled, or as a fallback */ return talk_spell_basename(filename, prefix_ids, enqueue); return 0; } /* Play a directory's .talk thumbnail, fallback to spelling the filename, or go straight to spelling depending on settings. */ int talk_dir_or_spell(const char* dirname, const long *prefix_ids, bool enqueue) { if (global_settings.talk_dir_clip) { /* .talk clips enabled */ if(talk_file(dirname, NULL, dir_thumbnail_name, NULL, prefix_ids, enqueue) >0) return 0; } if (global_settings.talk_dir == 2) /* Either .talk clips disabled or as a fallback */ return talk_spell_basename(dirname, prefix_ids, enqueue); return 0; } /* Speak thumbnail for each component of a full path, again falling back or going straight to spelling depending on settings. */ int talk_fullpath(const char* path, bool enqueue) { if (!enqueue) talk_shutup(); if(path[0] != '/') /* path ought to start with /... */ return talk_spell(path, true); talk_id(VOICE_CHAR_SLASH, true); char buf[MAX_PATH]; strlcpy(buf, path, MAX_PATH); char *start = buf+1; /* start of current component */ char *ptr = strchr(start, '/'); /* end of current component */ while(ptr) { /* There are more slashes ahead */ /* temporarily poke a NULL at end of component to truncate string */ *ptr = '\0'; talk_dir_or_spell(buf, NULL, true); *ptr = '/'; /* restore string */ talk_id(VOICE_CHAR_SLASH, true); start = ptr+1; /* setup for next component */ ptr = strchr(start, '/'); } /* no more slashes, final component is a filename */ return talk_file_or_spell(NULL, buf, NULL, true); } /* say a numeric value, this word ordering works for english, but not necessarily for other languages (e.g. german) */ int talk_number(long n, bool enqueue) { int level = 2; /* mille count */ long mil = 1000000000; /* highest possible "-illion" */ if (talk_temp_disable_count > 0) return -1; /* talking has been disabled */ if (!check_audio_status()) return -1; if (!enqueue) talk_shutup(); /* cut off all the pending stuff */ if (n==0) { /* special case */ talk_id(VOICE_ZERO, true); return 0; } if (n<0) { talk_id(VOICE_MINUS, true); n = -n; } while (n) { int segment = n / mil; /* extract in groups of 3 digits */ n -= segment * mil; /* remove the used digits from number */ mil /= 1000; /* digit place for next round */ if (segment) { int hundreds = segment / 100; int ones = segment % 100; if (hundreds) { talk_id(VOICE_ZERO + hundreds, true); talk_id(VOICE_HUNDRED, true); } /* combination indexing */ if (ones > 20) { int tens = ones/10 + 18; talk_id(VOICE_ZERO + tens, true); ones %= 10; } /* direct indexing */ if (ones) talk_id(VOICE_ZERO + ones, true); /* add billion, million, thousand */ if (mil) talk_id(VOICE_THOUSAND + level, true); } level--; } return 0; } /* Say year like "nineteen ninety nine" instead of "one thousand 9 hundred ninety nine". */ static int talk_year(long year, bool enqueue) { int rem; if(year < 1100 || year >=2000) /* just say it as a regular number */ return talk_number(year, enqueue); /* Say century */ talk_number(year/100, enqueue); rem = year%100; if(rem == 0) /* as in 1900 */ return talk_id(VOICE_HUNDRED, true); if(rem <10) /* as in 1905 */ talk_id(VOICE_ZERO, true); /* sub-century year */ return talk_number(rem, true); } /* Say time duration/interval. Input is time in seconds, say hours,minutes,seconds. */ static int talk_time_unit(long secs, bool enqueue) { return talk_time_intervals(secs, UNIT_SEC, enqueue); } void talk_fractional(char *tbuf, int value, int unit) { int i; /* strip trailing zeros from the fraction */ for (i = strlen(tbuf) - 1; (i >= 0) && (tbuf[i] == '0'); i--) tbuf[i] = '\0'; talk_number(value, true); if (tbuf[0] != 0) { talk_id(LANG_POINT, true); talk_spell(tbuf, true); } talk_id(unit, true); } int talk_value(long n, int unit, bool enqueue) { return talk_value_decimal(n, unit, 0, enqueue); } /* singular/plural aware saying of a value */ int talk_value_decimal(long n, int unit, int decimals, bool enqueue) { int unit_id; static const int unit_voiced[] = { /* lookup table for the voice ID of the units */ [0 ... UNIT_LAST-1] = -1, /* regular ID, int, signed */ [UNIT_MS] = VOICE_MILLISECONDS, /* here come the "real" units */ [UNIT_SEC] = VOICE_SECONDS, [UNIT_MIN] = VOICE_MINUTES, [UNIT_HOUR] = VOICE_HOURS, [UNIT_KHZ] = VOICE_KHZ, [UNIT_DB] = VOICE_DB, [UNIT_PERCENT] = VOICE_PERCENT, [UNIT_MAH] = VOICE_MILLIAMPHOURS, [UNIT_PIXEL] = VOICE_PIXEL, [UNIT_PER_SEC] = VOICE_PER_SEC, [UNIT_HERTZ] = VOICE_HERTZ, [UNIT_MB] = LANG_MEBIBYTE, [UNIT_KBIT] = VOICE_KBIT_PER_SEC, [UNIT_PM_TICK] = VOICE_PM_UNITS_PER_TICK, }; static const int pow10[] = { /* 10^0 - 10^7 */ 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000 }; char tbuf[8]; char fmt[] = "%0nd"; if (talk_temp_disable_count > 0) return -1; /* talking has been disabled */ if (!check_audio_status()) return -1; /* special pronounciation for year number */ if (unit == UNIT_DATEYEAR) return talk_year(n, enqueue); /* special case for time duration */ if (unit == UNIT_TIME) return talk_time_unit(n, enqueue); if (unit < 0 || unit >= UNIT_LAST) unit_id = -1; else unit_id = unit_voiced[unit]; if ((n==1 || n==-1) /* singular? */ && unit_id >= VOICE_SECONDS && unit_id <= VOICE_HOURS) { unit_id--; /* use the singular for those units which have */ } /* special case with a "plus" before */ if (n > 0 && (unit == UNIT_SIGNED || unit == UNIT_DB)) { talk_id(VOICE_PLUS, enqueue); enqueue = true; } if (decimals) { /* needed for the "-0.5" corner case */ if (n < 0) { talk_id(VOICE_MINUS, enqueue); n = -n; } fmt[2] = '0' + decimals; snprintf(tbuf, sizeof(tbuf), fmt, n % pow10[decimals]); talk_fractional(tbuf, n / pow10[decimals], unit_id); return 0; } talk_number(n, enqueue); /* say the number */ talk_id(unit_id, true); /* say the unit, if any */ return 0; } /* Say time duration/interval. Input is time unit specifies base unit, say hours,minutes,seconds, milliseconds. or any combination thereof */ int talk_time_intervals(long time, int unit_idx, bool enqueue) { unsigned long units_in[UNIT_IDX_TIME_COUNT]; if (talk_temp_disable_count > 0) return -1; /* talking has been disabled */ if (!check_audio_status()) return -1; if (talk_handle <= 0 || index_handle <= 0) /* reload needed? */ { int fd = open_voicefile(); if (fd < 0 || !load_voicefile_index(fd)) return -1; load_voicefile_data(fd); close(fd); } if (!enqueue) talk_shutup(); /* cut off all the pending stuff */ time_split_units(unit_idx, labs(time), &units_in); if (time < 0) talk_id(VOICE_MINUS, true); if (time == 0) talk_value(0, unit_idx, true); else { if (units_in[UNIT_IDX_HR] != 0) { talk_value(units_in[UNIT_IDX_HR], UNIT_HOUR, true); } if (units_in[UNIT_IDX_MIN] != 0) { talk_value(units_in[UNIT_IDX_MIN], UNIT_MIN, true); } if (units_in[UNIT_IDX_SEC] != 0) { talk_value(units_in[UNIT_IDX_SEC], UNIT_SEC, true); } if (units_in[UNIT_IDX_MS] != 0) { talk_value(units_in[UNIT_IDX_MS], UNIT_MS, true); } } return -1; } /* spell a string */ int talk_spell(const char* spell, bool enqueue) { char c; /* currently processed char */ if (talk_temp_disable_count > 0) return -1; /* talking has been disabled */ if (!check_audio_status()) return -1; if (!enqueue) talk_shutup(); /* cut off all the pending stuff */ while ((c = *spell++) != '\0') { /* if this grows into too many cases, I should use a table */ if (c >= 'A' && c <= 'Z') talk_id(VOICE_CHAR_A + c - 'A', true); else if (c >= 'a' && c <= 'z') talk_id(VOICE_CHAR_A + c - 'a', true); else if (c >= '0' && c <= '9') talk_id(VOICE_ZERO + c - '0', true); else if (c == '-') talk_id(VOICE_MINUS, true); else if (c == '+') talk_id(VOICE_PLUS, true); else if (c == '.') talk_id(VOICE_DOT, true); else if (c == ' ') talk_id(VOICE_PAUSE, true); else if (c == '/') talk_id(VOICE_CHAR_SLASH, true); } return 0; } void talk_disable(bool disable) { if (disable) talk_temp_disable_count++; else talk_temp_disable_count--; } void talk_setting(const void *global_settings_variable) { const struct settings_list *setting; if (!global_settings.talk_menu) return; setting = find_setting(global_settings_variable, NULL); if (setting == NULL) return; if (setting->lang_id) talk_id(setting->lang_id,false); } void talk_date(const struct tm *tm, bool enqueue) { talk_id(LANG_MONTH_JANUARY + tm->tm_mon, enqueue); talk_number(tm->tm_mday, true); talk_number(1900 + tm->tm_year, true); } void talk_time(const struct tm *tm, bool enqueue) { if (global_settings.timeformat == 1) { /* Voice the hour */ long am_pm_id = VOICE_AM; int hour = tm->tm_hour; if (hour >= 12) { am_pm_id = VOICE_PM; hour -= 12; } if (hour == 0) hour = 12; talk_number(hour, enqueue); /* Voice the minutes */ if (tm->tm_min == 0) { /* Say o'clock if the minute is 0. */ talk_id(VOICE_OCLOCK, true); } else { /* Pronounce the leading 0 */ if(tm->tm_min < 10) talk_id(VOICE_OH, true); talk_number(tm->tm_min, true); } talk_id(am_pm_id, true); } else { /* Voice the time in 24 hour format */ talk_number(tm->tm_hour, enqueue); if (tm->tm_min == 0) talk_ids(true, VOICE_HUNDRED, VOICE_HOUR); else { /* Pronounce the leading 0 */ if(tm->tm_min < 10) talk_id(VOICE_OH, true); talk_number(tm->tm_min, true); } } } bool talk_get_debug_data(struct talk_debug_data *data) { char* p_lang = DEFAULT_VOICE_LANG; /* default */ struct clip_cache_metadata *cc; memset(data, 0, sizeof(*data)); data->status = talk_status; if (global_settings.lang_file[0] && global_settings.lang_file[0] != 0xff) p_lang = (char *)global_settings.lang_file; strlcpy(data->voicefile, p_lang, sizeof(data->voicefile)); if (!has_voicefile || index_handle <= 0) { if (data->status == TALK_STATUS_OK) data->status = TALK_STATUS_ERR_NOFILE; return false; } struct clip_entry *clips = core_get_data(index_handle); int cached = 0; int real_clips = 0; data->num_clips = voicefile.id1_max + voicefile.id2_max; data->avg_clipsize = data->max_clipsize = 0; data->min_clipsize = INT_MAX; for(int i = 0; i < data->num_clips; i++) { int size = clips[i].size & (~LOADED_MASK); if (!size) continue; real_clips += 1; if (size < data->min_clipsize) data->min_clipsize = size; if (size > data->max_clipsize) data->max_clipsize = size; data->avg_clipsize += size; } cc = buflib_get_data(&clip_ctx, metadata_table_handle); for (int i = 0; i < (int) max_clips; i++) { if (cc[i].handle > 0) cached += 1; } data->avg_clipsize /= real_clips; data->num_empty_clips = data->num_clips - real_clips; data->memory_allocated = sizeof(commit_buffer) + sizeof(voicefile) + data->num_clips * sizeof(struct clip_entry) + voicebuf_size; data->memory_used = 0; if (talk_handle > 0) data->memory_used = data->memory_allocated - buflib_available(&clip_ctx); data->cached_clips = cached; data->cache_hits = cache_hits; data->cache_misses = cache_misses; return true; }