rockbox/apps/metadata.c
Dave Bryant 2c28390972 Added code to handle APEv2 tags for WavPack files, although this should be
applicable to Musepack files (and even MP3 files sometimes). Perhaps this
should be integrated with the ID3 tag stuff at some point?


git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6769 a1c6a512-1295-4272-9138-f99709370657
2005-06-19 20:27:46 +00:00

705 lines
22 KiB
C

/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2005 Dave Chapman
*
* All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include "metadata.h"
#include "mp3_playback.h"
#include "mp3data.h"
#include "logf.h"
#include "atoi.h"
/* Simple file type probing by looking filename extension. */
int probe_file_format(const char *filename)
{
char *suffix;
suffix = strrchr(filename, '.');
if (suffix == NULL)
return AFMT_UNKNOWN;
suffix += 1;
if (!strcasecmp("mp1", suffix))
return AFMT_MPA_L1;
else if (!strcasecmp("mp2", suffix))
return AFMT_MPA_L2;
else if (!strcasecmp("mpa", suffix))
return AFMT_MPA_L2;
else if (!strcasecmp("mp3", suffix))
return AFMT_MPA_L3;
else if (!strcasecmp("ogg", suffix))
return AFMT_OGG_VORBIS;
else if (!strcasecmp("wav", suffix))
return AFMT_PCM_WAV;
else if (!strcasecmp("flac", suffix))
return AFMT_FLAC;
else if (!strcasecmp("mpc", suffix))
return AFMT_MPC;
else if (!strcasecmp("aac", suffix))
return AFMT_AAC;
else if (!strcasecmp("ape", suffix))
return AFMT_APE;
else if (!strcasecmp("wma", suffix))
return AFMT_WMA;
else if ((!strcasecmp("a52", suffix)) || (!strcasecmp("ac3", suffix)))
return AFMT_A52;
else if (!strcasecmp("rm", suffix))
return AFMT_REAL;
else if (!strcasecmp("wv", suffix))
return AFMT_WAVPACK;
return AFMT_UNKNOWN;
}
unsigned short a52_bitrates[]={32,40,48,56,64,80,96,
112,128,160,192,224,256,320,
384,448,512,576,640};
/* Only store frame sizes for 44.1KHz - others are simply multiples
of the bitrate */
unsigned short a52_441framesizes[]=
{69*2,70*2,87*2,88*2,104*2,105*2,121*2,122*2,
139*2,140*2,174*2,175*2,208*2,209*2,243*2,244*2,
278*2,279*2,348*2,349*2,417*2,418*2,487*2,488*2,
557*2,558*2,696*2,697*2,835*2,836*2,975*2,976*2,
1114*2,1115*2,1253*2,1254*2,1393*2,1394*2};
/* Get metadata for track - return false if parsing showed problems with the
file that would prevent playback. */
static bool get_apetag_info (struct mp3entry *entry, int fd);
bool get_metadata(struct track_info* track, int fd, const char* trackname,
bool v1first) {
unsigned long totalsamples,bytespersample,channels,bitspersample,numbytes;
int bytesperframe;
unsigned char* buf;
int i,j,eof;
int rc;
/* Load codec specific track tag information. */
switch (track->id3.codectype) {
case AFMT_MPA_L1:
case AFMT_MPA_L2:
case AFMT_MPA_L3:
/* Should check the return value. */
mp3info(&track->id3, trackname, v1first);
lseek(fd, 0, SEEK_SET);
/* This is too slow to execute on some files. */
get_mp3file_info(fd, &track->mp3data);
lseek(fd, 0, SEEK_SET);
/*
logf("T:%s", track->id3.title);
logf("L:%d", track->id3.length);
logf("O:%d", track->id3.first_frame_offset);
logf("F:%d", track->id3.frequency);
*/
track->taginfo_ready = true;
break ;
case AFMT_PCM_WAV:
/* Use the trackname part of the id3 structure as a temporary buffer */
buf=track->id3.path;
lseek(fd, 0, SEEK_SET);
rc = read(fd, buf, 44);
if (rc < 44) {
return false;
}
if ((memcmp(buf,"RIFF",4)!=0) ||
(memcmp(&buf[8],"WAVEfmt",7)!=0)) {
logf("%s is not a WAV file\n",trackname);
return(false);
}
/* FIX: Correctly parse WAV header - we assume canonical
44-byte header */
bitspersample=buf[34];
channels=buf[22];
if ((bitspersample!=16) || (channels != 2)) {
logf("Unsupported WAV file - %d bitspersample, %d channels\n",
bitspersample,channels);
return(false);
}
bytespersample=((bitspersample/8)*channels);
numbytes=(buf[40]|(buf[41]<<8)|(buf[42]<<16)|(buf[43]<<24));
totalsamples=numbytes/bytespersample;
track->id3.vbr=false; /* All WAV files are CBR */
track->id3.filesize=filesize(fd);
track->id3.frequency=buf[24]|(buf[25]<<8)|(buf[26]<<16)|(buf[27]<<24);
/* Calculate track length (in ms) and estimate the bitrate (in kbit/s) */
track->id3.length=(totalsamples/track->id3.frequency)*1000;
track->id3.bitrate=(track->id3.frequency*bytespersample)/(1000/8);
lseek(fd, 0, SEEK_SET);
strncpy(track->id3.path,trackname,sizeof(track->id3.path));
track->taginfo_ready = true;
break;
case AFMT_FLAC:
/* A simple parser to read vital metadata from a FLAC file - length, frequency, bitrate etc. */
/* This code should either be moved to a seperate file, or discarded in favour of the libFLAC code */
/* The FLAC stream specification can be found at http://flac.sourceforge.net/format.html#stream */
/* Use the trackname part of the id3 structure as a temporary buffer */
buf=track->id3.path;
lseek(fd, 0, SEEK_SET);
rc = read(fd, buf, 4);
if (rc < 4) {
return false;
}
if (memcmp(buf,"fLaC",4)!=0) {
logf("%s is not a FLAC file\n",trackname);
return(false);
}
while (1) {
rc = read(fd, buf, 4);
i = (buf[1]<<16)|(buf[2]<<8)|buf[3]; /* The length of the block */
if ((buf[0]&0x7f)==0) { /* 0 is the STREAMINFO block */
rc = read(fd, buf, i); /* FIXME: Don't trust the value of i */
if (rc < 0) {
return false;
}
track->id3.vbr=true; /* All FLAC files are VBR */
track->id3.filesize=filesize(fd);
track->id3.frequency=(buf[10]<<12)|(buf[11]<<4)|((buf[12]&0xf0)>>4);
/* NOT NEEDED: bitspersample=(((buf[12]&0x01)<<4)|((buf[13]&0xf0)>>4))+1; */
/* totalsamples is a 36-bit field, but we assume <= 32 bits are used */
totalsamples=(buf[14]<<24)|(buf[15]<<16)|(buf[16]<<8)|buf[17];
/* Calculate track length (in ms) and estimate the bitrate (in kbit/s) */
track->id3.length=(totalsamples/track->id3.frequency)*1000;
track->id3.bitrate=(filesize(fd)*8)/track->id3.length;
} else if ((buf[0]&0x7f)==4) { /* 4 is the VORBIS_COMMENT block */
/* The next i bytes of the file contain the VORBIS COMMENTS - just skip them for now. */
lseek(fd, i, SEEK_CUR);
} else {
if (buf[0]&0x80) { /* If we have reached the last metadata block, abort. */
break;
} else {
lseek(fd, i, SEEK_CUR); /* Skip to next metadata block */
}
}
}
lseek(fd, 0, SEEK_SET);
strncpy(track->id3.path,trackname,sizeof(track->id3.path));
track->taginfo_ready = true;
break;
case AFMT_OGG_VORBIS:
/* A simple parser to read vital metadata from an Ogg Vorbis file */
/* An Ogg File is split into pages, each starting with the string
"OggS". Each page has a timestamp (in PCM samples) referred to as
the "granule position".
An Ogg Vorbis has the following structure:
1) Identification header (containing samplerate, numchannels, etc)
2) Comment header - containing the Vorbis Comments
3) Setup header - containing codec setup information
4) Many audio packets...
*/
/* Use the trackname part of the id3 structure as a temporary buffer */
buf=track->id3.path;
lseek(fd, 0, SEEK_SET);
rc = read(fd, buf, 58);
if (rc < 4) {
return false;
}
if ((memcmp(buf,"OggS",4)!=0) || (memcmp(&buf[29],"vorbis",6)!=0)) {
logf("%s is not an Ogg Vorbis file\n",trackname);
return(false);
}
/* Ogg stores integers in little-endian format. */
track->id3.filesize=filesize(fd);
track->id3.frequency=buf[40]|(buf[41]<<8)|(buf[42]<<16)|(buf[43]<<24);
channels=buf[39];
/* We now need to search for the last page in the file - identified by
by ('O','g','g','S',0) and retrieve totalsamples */
lseek(fd, -32*1024, SEEK_END);
eof=0;
j=0; /* The number of bytes currently in buffer */
i=0;
totalsamples=0;
while (!eof) {
rc = read(fd, &buf[j], MAX_PATH-j);
if (rc <= 0) {
eof=1;
} else {
j+=rc;
}
/* Inefficient (but simple) search */
i=0;
while (i < (j-5)) {
if (memcmp(&buf[i],"OggS",5)==0) {
if (i < (j-10)) {
totalsamples=(buf[i+6])|(buf[i+7]<<8)|(buf[i+8]<<16)|(buf[i+9]<<24);
j=0; /* We can discard the rest of the buffer */
} else {
break;
}
} else {
i++;
}
}
if (i < (j-5)) {
/* Move OggS to start of buffer */
while(i>0) buf[i--]=buf[j--];
} else {
j=0;
}
}
track->id3.length=(totalsamples/track->id3.frequency)*1000;
/* The following calculation should use datasize, not filesize (i.e. exclude comments etc) */
track->id3.bitrate=(filesize(fd)*8)/track->id3.length;
track->id3.vbr=true;
lseek(fd, 0, SEEK_SET);
strncpy(track->id3.path,trackname,sizeof(track->id3.path));
track->taginfo_ready = true;
break;
case AFMT_WAVPACK:
/* A simple parser to read basic information from a WavPack file.
* This will fail on WavPack files that don't have the WavPack header
* as the first thing (i.e. self-extracting WavPack files) or WavPack
* files that have so much extra RIFF data stored in the first block
* that they don't have samples (very rare, I would think).
*/
/* Use the trackname part of the id3 structure as a temporary buffer */
buf=track->id3.path;
lseek(fd, 0, SEEK_SET);
rc = read(fd, buf, 32);
if (rc < 32) {
return false;
}
if (memcmp (buf, "wvpk", 4) != 0 || buf [9] != 4 || buf [8] < 2) {
logf ("%s is not a WavPack file\n", trackname);
return (false);
}
track->id3.vbr = true; /* All WavPack files are VBR */
track->id3.filesize = filesize (fd);
track->id3.frequency = 44100;
if ((buf [20] | buf [21] | buf [22] | buf [23]) &&
(buf [12] & buf [13] & buf [14] & buf [15]) != 0xff) {
totalsamples = (buf[15] << 24) | (buf[14] << 16) | (buf[13] << 8) | buf[12];
track->id3.length = (totalsamples + 220) / 441 * 10;
track->id3.bitrate = filesize (fd) /
(track->id3.length / 8);
}
get_apetag_info (&track->id3, fd); /* use any apetag info we find */
lseek (fd, 0, SEEK_SET);
strncpy (track->id3.path, trackname, sizeof (track->id3.path));
track->taginfo_ready = true;
break;
case AFMT_A52:
/* Use the trackname part of the id3 structure as a temporary buffer */
buf=track->id3.path;
lseek(fd, 0, SEEK_SET);
/* We just need the first 5 bytes */
rc = read(fd, buf, 5);
if (rc < 5) {
return false;
}
if ((buf[0]!=0x0b) || (buf[1]!=0x77)) {
logf("%s is not an A52/AC3 file\n",trackname);
return false;
}
i = buf[4]&0x3e;
if (i > 36) {
logf("A52: Invalid frmsizecod: %d\n",i);
return false;
}
track->id3.bitrate=a52_bitrates[i>>1];
track->id3.vbr=false;
track->id3.filesize = filesize (fd);
switch (buf[4]&0xc0) {
case 0x00:
track->id3.frequency=48000;
bytesperframe=track->id3.bitrate*2*2;
break;
case 0x40:
track->id3.frequency=44100;
bytesperframe=a52_441framesizes[i];
break;
case 0x80:
track->id3.frequency=32000;
bytesperframe=track->id3.bitrate*3*2;
break;
default:
logf("A52: Invalid samplerate code: 0x%02x\n",buf[4]&0xc0);
return false;
break;
}
/* One A52 frame contains 6 blocks, each containing 256 samples */
totalsamples=(track->filesize/bytesperframe)*6*256;
track->id3.length=(totalsamples/track->id3.frequency)*1000;
lseek(fd, 0, SEEK_SET);
strncpy(track->id3.path,trackname,sizeof(track->id3.path));
track->taginfo_ready = true;
break;
/* If we don't know how to read the metadata, just store the filename */
default:
strncpy(track->id3.path,trackname,sizeof(track->id3.path));
track->taginfo_ready = true;
break;
}
return true;
}
/************************* APE TAG HANDLING CODE ****************************/
/*
* This is a first pass at APEv2 tag handling. I'm not sure if this should
* reside here, but I wanted to modify as little as possible since I don't
* have a feel for the complete system. It may be that APEv2 tags should be
* added to the ID3 handling code in the firmware directory. APEv2 tags are
* used in WavPack files and Musepack files by default, however they are
* also used in MP3 files sometimes (by Foobar2000). Also, WavPack files can
* also use ID3v1 tags (but not ID3v2), so it seems like some universal tag
* handler might be a reasonable approach.
*
* This code does not currently handle APEv1 tags, but I believe that this
* is not a problem because they were only used in Monkey's Audio files which
* will probably never be playable in RockBox (and certainly not by this CPU).
*/
#define APETAG_HEADER_FORMAT "8LLLL"
#define APETAG_HEADER_LENGTH 32
#define APETAG_DATA_LIMIT 4096
struct apetag_header {
char id [8];
long version, length, item_count, flags;
char res [8];
};
static struct apetag {
struct apetag_header header;
char data [APETAG_DATA_LIMIT];
} temp_apetag;
static int get_apetag_item (struct apetag *tag,
const char *item,
char *value,
int size);
static int load_apetag (int fd, struct apetag *tag);
static void UTF8ToAnsi (unsigned char *pUTF8);
/*
* This function searches the specified file for an APEv2 tag and uses any
* information found there to populate the appropriate fields in the specified
* mp3entry structure. A temporary buffer is used to hold the tag during this
* operation. For now, the actual string data that needs to be held during the
* life of the track entry is stored in the "id3v2buf" field (which should not
* be used for any file that has an APEv2 tag). This limits the total space
* for the artist, title, album, composer and genre strings to 300 characters.
*/
static bool get_apetag_info (struct mp3entry *entry, int fd)
{
int rem_space = sizeof (entry->id3v2buf), str_space;
char *temp_buffer = entry->id3v2buf;
if (rem_space <= 1 || !load_apetag (fd, &temp_apetag))
return false;
if (get_apetag_item (&temp_apetag, "year", temp_buffer, rem_space))
entry->year = atoi (temp_buffer);
if (get_apetag_item (&temp_apetag, "track", temp_buffer, rem_space))
entry->tracknum = atoi (temp_buffer);
if (get_apetag_item (&temp_apetag, "artist", temp_buffer, rem_space)) {
UTF8ToAnsi (entry->artist = temp_buffer);
str_space = strlen (temp_buffer) + 1;
temp_buffer += str_space;
rem_space -= str_space;
}
if (rem_space > 1 &&
get_apetag_item (&temp_apetag, "title", temp_buffer, rem_space)) {
UTF8ToAnsi (entry->title = temp_buffer);
str_space = strlen (temp_buffer) + 1;
temp_buffer += str_space;
rem_space -= str_space;
}
if (rem_space > 1 &&
get_apetag_item (&temp_apetag, "album", temp_buffer, rem_space)) {
UTF8ToAnsi (entry->album = temp_buffer);
str_space = strlen (temp_buffer) + 1;
temp_buffer += str_space;
rem_space -= str_space;
}
if (rem_space > 1 &&
get_apetag_item (&temp_apetag, "genre", temp_buffer, rem_space)) {
UTF8ToAnsi (entry->genre_string = temp_buffer);
str_space = strlen (temp_buffer) + 1;
temp_buffer += str_space;
rem_space -= str_space;
}
if (rem_space > 1 &&
get_apetag_item (&temp_apetag, "composer", temp_buffer, rem_space))
UTF8ToAnsi (entry->composer = temp_buffer);
return true;
}
/*
* Helper function to convert little-endian structures to easily usable native
* format using a format string (this does nothing on a little-endian machine).
*/
static void little_endian_to_native (void *data, char *format)
{
unsigned char *cp = (unsigned char *) data;
long temp;
while (*format) {
switch (*format) {
case 'L':
temp = cp [0] + ((long) cp [1] << 8) + ((long) cp [2] << 16) + ((long) cp [3] << 24);
* (long *) cp = temp;
cp += 4;
break;
case 'S':
temp = cp [0] + (cp [1] << 8);
* (short *) cp = (short) temp;
cp += 2;
break;
default:
if (*format >= '0' && *format <= '9')
cp += *format - '0';
break;
}
format++;
}
}
/*
* Attempt to obtain the named string-type item from the specified APEv2 tag.
* The tag value will be copied to "value" (including an appended terminating
* NULL) and the length of the string (including the NULL) will be returned.
* If the data will not fit in the specified "size" then it will be truncated
* early (but still terminated). If the specified item is not found then 0 is
* returned and written to the first character of "value". If "value" is
* passed in as NULL, then the specified size is ignored and the actual size
* required to store the value is returned.
*
* Note that this function does not work on binary tag data; only UTF-8
* encoded strings. However, numeric data (like ReplayGain) is usually stored
* as strings.
*
* Also, APEv2 tags may have multiple values for a given item and these will
* all be copied to "value" with NULL separators (this is why the total data
* size is returned). Of course, it is possible to ignore any additional
* values by simply using up to the first NULL.
*/
static int get_apetag_item (struct apetag *tag,
const char *item,
char *value,
int size)
{
if (value && size)
*value = 0;
if (tag->header.id [0] == 'A') {
char *p = tag->data;
char *q = p + tag->header.length - APETAG_HEADER_LENGTH;
int i;
for (i = 0; i < tag->header.item_count; ++i) {
int vsize, flags, isize;
vsize = * (long *) p; p += 4;
flags = * (long *) p; p += 4;
isize = strlen (p);
little_endian_to_native (&vsize, "L");
little_endian_to_native (&flags, "L");
if (p + isize + vsize + 1 > q)
break;
if (isize && vsize && !stricmp (item, p) && !(flags & 6)) {
if (value) {
if (vsize + 1 > size)
vsize = size - 1;
memcpy (value, p + isize + 1, vsize);
value [vsize] = 0;
}
return vsize + 1;
}
else
p += isize + vsize + 1;
}
}
return 0;
}
/*
* Attempt to load an APEv2 tag from the specified file into the specified
* structure. If the APEv2 tag will not fit into the predefined data size,
* then the tag is not loaded. A return value of TRUE indicates success.
*/
static int load_apetag (int fd, struct apetag *tag)
{
if (lseek (fd, -APETAG_HEADER_LENGTH, SEEK_END) == -1 ||
read (fd, &tag->header, APETAG_HEADER_LENGTH) != APETAG_HEADER_LENGTH ||
strncmp (tag->header.id, "APETAGEX", 8)) {
tag->header.id [0] = 0;
return false;
}
little_endian_to_native (&tag->header, APETAG_HEADER_FORMAT);
if (tag->header.version == 2000 && tag->header.item_count &&
tag->header.length > APETAG_HEADER_LENGTH &&
tag->header.length < APETAG_DATA_LIMIT) {
int data_size = tag->header.length - APETAG_HEADER_LENGTH;
if (lseek (fd, -tag->header.length, SEEK_END) == -1 ||
read (fd, tag->data, data_size) != data_size) {
tag->header.id [0] = 0;
return false;
}
else
return true;
}
tag->header.id [0] = 0;
return false;
}
/*
* This is a *VERY* boneheaded attempt to convert UTF-8 unicode character
* strings to ANSI. It simply maps the 16-bit Unicode characters that are
* less than 0x100 directly to an 8-bit value, and turns all the rest into
* question marks. This can be done "in-place" because the resulting string
* can only get smaller.
*/
static void UTF8ToAnsi (unsigned char *pUTF8)
{
unsigned char *pAnsi = pUTF8;
unsigned short widechar = 0;
int trail_bytes = 0;
while (*pUTF8) {
if (*pUTF8 & 0x80) {
if (*pUTF8 & 0x40) {
if (trail_bytes) {
trail_bytes = 0;
*pAnsi++ = widechar < 0x100 ? widechar : '?';
}
else {
char temp = *pUTF8;
while (temp & 0x80) {
trail_bytes++;
temp <<= 1;
}
widechar = temp >> trail_bytes--;
}
}
else if (trail_bytes) {
widechar = (widechar << 6) | (*pUTF8 & 0x3f);
if (!--trail_bytes)
*pAnsi++ = widechar < 0x100 ? widechar : '?';
}
}
else
*pAnsi++ = *pUTF8;
pUTF8++;
}
*pAnsi = 0;
}