rockbox/lib/rbcodec/metadata/mp4.c
Solomon Peachy bdc3dd4fca MP4: append mdat sections together in metadata parser
mp4 files can have multiple 'mdat' chunks.  This is common for
audiobooks, where there is often a secondary mdat containing the
chapter names, but it's also legal to have multiple mdat chunks
for a single logical "track"

This confuses the mp4 metadata parser, which assumes there is
only a single mdat, and always uses the last mdat seen to
determine the "filesize" of the data we're trying to decode.

Work around this by appending each mdat's size to result in the final
"filesize"

Change-Id: I3e7a7efb0f05ef965e8d77f79e450c957524a480
2021-02-19 13:35:41 +00:00

843 lines
25 KiB
C

/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2005 Magnus Holmgren
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <inttypes.h>
#include "platform.h"
#include "errno.h"
#include "metadata.h"
#include "metadata_common.h"
#include "metadata_parsers.h"
#include "logf.h"
#include "debug.h"
#include "replaygain.h"
#ifdef DEBUGF
#undef DEBUGF
#define DEBUGF(...)
#endif
#define MP4_3gp6 FOURCC('3', 'g', 'p', '6')
#define MP4_aART FOURCC('a', 'A', 'R', 'T')
#define MP4_alac FOURCC('a', 'l', 'a', 'c')
#define MP4_calb FOURCC(0xa9, 'a', 'l', 'b')
#define MP4_cART FOURCC(0xa9, 'A', 'R', 'T')
#define MP4_cgrp FOURCC(0xa9, 'g', 'r', 'p')
#define MP4_cgen FOURCC(0xa9, 'g', 'e', 'n')
#define MP4_chpl FOURCC('c', 'h', 'p', 'l')
#define MP4_cnam FOURCC(0xa9, 'n', 'a', 'm')
#define MP4_cwrt FOURCC(0xa9, 'w', 'r', 't')
#define MP4_ccmt FOURCC(0xa9, 'c', 'm', 't')
#define MP4_cday FOURCC(0xa9, 'd', 'a', 'y')
#define MP4_covr FOURCC('c', 'o', 'v', 'r')
#define MP4_disk FOURCC('d', 'i', 's', 'k')
#define MP4_esds FOURCC('e', 's', 'd', 's')
#define MP4_ftyp FOURCC('f', 't', 'y', 'p')
#define MP4_gnre FOURCC('g', 'n', 'r', 'e')
#define MP4_hdlr FOURCC('h', 'd', 'l', 'r')
#define MP4_ilst FOURCC('i', 'l', 's', 't')
#define MP4_isom FOURCC('i', 's', 'o', 'm')
#define MP4_M4A FOURCC('M', '4', 'A', ' ')
#define MP4_m4a FOURCC('m', '4', 'a', ' ') /*technically its "M4A "*/
#define MP4_M4B FOURCC('M', '4', 'B', ' ') /*but files exist with lower case*/
#define MP4_mdat FOURCC('m', 'd', 'a', 't')
#define MP4_mdia FOURCC('m', 'd', 'i', 'a')
#define MP4_mdir FOURCC('m', 'd', 'i', 'r')
#define MP4_meta FOURCC('m', 'e', 't', 'a')
#define MP4_minf FOURCC('m', 'i', 'n', 'f')
#define MP4_moov FOURCC('m', 'o', 'o', 'v')
#define MP4_mp4a FOURCC('m', 'p', '4', 'a')
#define MP4_mp42 FOURCC('m', 'p', '4', '2')
#define MP4_qt FOURCC('q', 't', ' ', ' ')
#define MP4_soun FOURCC('s', 'o', 'u', 'n')
#define MP4_stbl FOURCC('s', 't', 'b', 'l')
#define MP4_stsd FOURCC('s', 't', 's', 'd')
#define MP4_stts FOURCC('s', 't', 't', 's')
#define MP4_trak FOURCC('t', 'r', 'a', 'k')
#define MP4_trkn FOURCC('t', 'r', 'k', 'n')
#define MP4_udta FOURCC('u', 'd', 't', 'a')
#define MP4_extra FOURCC('-', '-', '-', '-')
/* Read the tag data from an MP4 file, storing up to buffer_size bytes in
* buffer.
*/
static unsigned long read_mp4_tag(int fd, unsigned int size_left, char* buffer,
unsigned int buffer_left)
{
unsigned int bytes_read = 0;
if (buffer_left == 0)
{
lseek(fd, size_left, SEEK_CUR); /* Skip everything */
}
else
{
/* Skip the data tag header - maybe we should parse it properly? */
lseek(fd, 16, SEEK_CUR);
size_left -= 16;
if (size_left > buffer_left)
{
read(fd, buffer, buffer_left);
lseek(fd, size_left - buffer_left, SEEK_CUR);
bytes_read = buffer_left;
}
else
{
read(fd, buffer, size_left);
bytes_read = size_left;
}
}
return bytes_read;
}
/* Read a string tag from an MP4 file */
static unsigned int read_mp4_tag_string(int fd, int size_left, char** buffer,
unsigned int* buffer_left, char** dest)
{
unsigned int bytes_read = read_mp4_tag(fd, size_left, *buffer,
*buffer_left > 0 ? *buffer_left - 1 : 0);
unsigned int length = 0;
if (bytes_read)
{
/* Do not overwrite already available metadata. Especially when reading
* tags with e.g. multiple genres / artists. This way only the first
* of multiple entries is used, all following are dropped. */
if (*dest == NULL)
{
(*buffer)[bytes_read] = 0; /* zero-terminate for correct strlen().*/
length = strlen(*buffer) + 1;
length = MIN(length, ID3V2_MAX_ITEM_SIZE); /* Limit item size. */
*dest = *buffer;
(*buffer)[length-1] = 0; /* zero-terminate buffer. */
*buffer_left -= length;
*buffer += length;
}
}
else
{
*dest = NULL;
}
return length;
}
static unsigned int read_mp4_atom(int fd, uint32_t* size,
uint32_t* type, uint32_t size_left)
{
read_uint32be(fd, size);
read_uint32be(fd, type);
if (*size == 1)
{
/* FAT32 doesn't support files this big, so something seems to
* be wrong. (64-bit sizes should only be used when required.)
*/
errno = EFBIG;
*type = 0;
return 0;
}
if (*size > 0)
{
if (*size > size_left)
{
size_left = 0;
}
else
{
size_left -= *size;
}
*size -= 8;
}
else
{
*size = size_left;
size_left = 0;
}
return size_left;
}
static unsigned int read_mp4_length(int fd, uint32_t* size)
{
unsigned int length = 0;
int bytes = 0;
unsigned char c;
do
{
read(fd, &c, 1);
bytes++;
(*size)--;
length = (length << 7) | (c & 0x7F);
}
while ((c & 0x80) && (bytes < 4) && (*size > 0));
return length;
}
static bool read_mp4_esds(int fd, struct mp3entry* id3, uint32_t* size)
{
unsigned char buf[8];
bool sbr = false;
lseek(fd, 4, SEEK_CUR); /* Version and flags. */
read(fd, buf, 1); /* Verify ES_DescrTag. */
*size -= 5;
if (*buf == 3)
{
/* read length */
if (read_mp4_length(fd, size) < 20)
{
return sbr;
}
lseek(fd, 3, SEEK_CUR);
*size -= 3;
}
else
{
lseek(fd, 2, SEEK_CUR);
*size -= 2;
}
read(fd, buf, 1); /* Verify DecoderConfigDescrTab. */
*size -= 1;
if (*buf != 4)
{
return sbr;
}
if (read_mp4_length(fd, size) < 13)
{
return sbr;
}
lseek(fd, 13, SEEK_CUR); /* Skip audio type, bit rates, etc. */
read(fd, buf, 1);
*size -= 14;
if (*buf != 5) /* Verify DecSpecificInfoTag. */
{
return sbr;
}
{
static const int sample_rates[] =
{
96000, 88200, 64000, 48000, 44100, 32000,
24000, 22050, 16000, 12000, 11025, 8000
};
unsigned long bits;
unsigned int length;
unsigned int index;
unsigned int type;
/* Read the (leading part of the) decoder config. */
length = read_mp4_length(fd, size);
length = MIN(length, *size);
length = MIN(length, sizeof(buf));
memset(buf, 0, sizeof(buf));
read(fd, buf, length);
*size -= length;
/* Maybe time to write a simple read_bits function... */
/* Decoder config format:
* Object type - 5 bits
* Frequency index - 4 bits
* Channel configuration - 4 bits
*/
bits = get_long_be(buf);
type = bits >> 27; /* Object type - 5 bits */
index = (bits >> 23) & 0xf; /* Frequency index - 4 bits */
if (index < (sizeof(sample_rates) / sizeof(*sample_rates)))
{
id3->frequency = sample_rates[index];
}
if (type == 5)
{
unsigned int old_index = index;
sbr = true;
index = (bits >> 15) & 0xf; /* Frequency index - 4 bits */
if (index == 15)
{
/* 17 bits read so far... */
bits = get_long_be(&buf[2]);
id3->frequency = (bits >> 7) & 0x00ffffff;
}
else if (index < (sizeof(sample_rates) / sizeof(*sample_rates)))
{
id3->frequency = sample_rates[index];
}
if (old_index == index)
{
/* Downsampled SBR */
id3->frequency *= 2;
}
}
/* Skip 13 bits from above, plus 3 bits, then read 11 bits */
else if ((length >= 4) && (((bits >> 5) & 0x7ff) == 0x2b7))
{
/* We found an extensionAudioObjectType */
type = bits & 0x1f; /* Object type - 5 bits*/
bits = get_long_be(&buf[4]);
if (type == 5)
{
sbr = bits >> 31;
if (sbr)
{
unsigned int old_index = index;
/* 1 bit read so far */
index = (bits >> 27) & 0xf; /* Frequency index - 4 bits */
if (index == 15)
{
/* 5 bits read so far */
id3->frequency = (bits >> 3) & 0x00ffffff;
}
else if (index < (sizeof(sample_rates) / sizeof(*sample_rates)))
{
id3->frequency = sample_rates[index];
}
if (old_index == index)
{
/* Downsampled SBR */
id3->frequency *= 2;
}
}
}
}
if (!sbr && (id3->frequency <= 24000) && (length <= 2))
{
/* Double the frequency for low-frequency files without a "long"
* DecSpecificConfig header. The file may or may not contain SBR,
* but here we guess it does if the header is short. This can
* fail on some files, but it's the best we can do, short of
* decoding (parts of) the file.
*/
id3->frequency *= 2;
sbr = true;
}
}
return sbr;
}
static bool read_mp4_tags(int fd, struct mp3entry* id3,
uint32_t size_left)
{
uint32_t size;
uint32_t type;
unsigned int buffer_left = sizeof(id3->id3v2buf) + sizeof(id3->id3v1buf);
char* buffer = id3->id3v2buf;
bool cwrt = false;
do
{
size_left = read_mp4_atom(fd, &size, &type, size_left);
/* DEBUGF("Tag atom: '%c%c%c%c' (%d bytes left)\n", type >> 24 & 0xff,
type >> 16 & 0xff, type >> 8 & 0xff, type & 0xff, size); */
switch (type)
{
case MP4_cnam:
read_mp4_tag_string(fd, size, &buffer, &buffer_left,
&id3->title);
break;
case MP4_cART:
read_mp4_tag_string(fd, size, &buffer, &buffer_left,
&id3->artist);
break;
case MP4_aART:
read_mp4_tag_string(fd, size, &buffer, &buffer_left,
&id3->albumartist);
break;
case MP4_cgrp:
read_mp4_tag_string(fd, size, &buffer, &buffer_left,
&id3->grouping);
break;
case MP4_calb:
read_mp4_tag_string(fd, size, &buffer, &buffer_left,
&id3->album);
break;
case MP4_cwrt:
read_mp4_tag_string(fd, size, &buffer, &buffer_left,
&id3->composer);
cwrt = false;
break;
case MP4_ccmt:
read_mp4_tag_string(fd, size, &buffer, &buffer_left,
&id3->comment);
break;
case MP4_cday:
read_mp4_tag_string(fd, size, &buffer, &buffer_left,
&id3->year_string);
/* Try to parse it as a year, for the benefit of the database.
*/
if(id3->year_string)
{
id3->year = atoi(id3->year_string);
if (id3->year < 1900)
{
id3->year = 0;
}
}
else
id3->year = 0;
break;
case MP4_gnre:
{
unsigned short genre;
read_mp4_tag(fd, size, (char*) &genre, sizeof(genre));
id3->genre_string = id3_get_num_genre(betoh16(genre) - 1);
}
break;
case MP4_cgen:
read_mp4_tag_string(fd, size, &buffer, &buffer_left,
&id3->genre_string);
break;
case MP4_disk:
{
unsigned short n[2];
read_mp4_tag(fd, size, (char*) &n, sizeof(n));
id3->discnum = betoh16(n[1]);
}
break;
case MP4_trkn:
{
unsigned short n[2];
read_mp4_tag(fd, size, (char*) &n, sizeof(n));
id3->tracknum = betoh16(n[1]);
}
break;
#ifdef HAVE_ALBUMART
case MP4_covr:
{
int pos = lseek(fd, 0, SEEK_CUR) + 16;
read_mp4_tag(fd, size, buffer, 8);
id3->albumart.type = AA_TYPE_UNKNOWN;
if (memcmp(buffer, "\xff\xd8\xff\xe0", 4) == 0)
{
id3->albumart.type = AA_TYPE_JPG;
}
else if (memcmp(buffer, "\x89\x50\x4e\x47\x0d\x0a\x1a\x0a", 8) == 0)
{
id3->albumart.type = AA_TYPE_PNG;
}
if (id3->albumart.type != AA_TYPE_UNKNOWN)
{
id3->albumart.pos = pos;
id3->albumart.size = size - 16;
id3->has_embedded_albumart = true;
}
}
break;
#endif
case MP4_extra:
{
char tag_name[TAG_NAME_LENGTH];
uint32_t sub_size;
/* "mean" atom */
read_uint32be(fd, &sub_size);
size -= sub_size;
lseek(fd, sub_size - 4, SEEK_CUR);
/* "name" atom */
read_uint32be(fd, &sub_size);
size -= sub_size;
lseek(fd, 8, SEEK_CUR);
sub_size -= 12;
if (sub_size > sizeof(tag_name) - 1)
{
read(fd, tag_name, sizeof(tag_name) - 1);
lseek(fd, sub_size - (sizeof(tag_name) - 1), SEEK_CUR);
tag_name[sizeof(tag_name) - 1] = 0;
}
else
{
read(fd, tag_name, sub_size);
tag_name[sub_size] = 0;
}
if ((strcasecmp(tag_name, "composer") == 0) && !cwrt)
{
read_mp4_tag_string(fd, size, &buffer, &buffer_left,
&id3->composer);
}
else if (strcasecmp(tag_name, "iTunSMPB") == 0)
{
char value[TAG_VALUE_LENGTH];
char* value_p = value;
char* any;
unsigned int length = sizeof(value);
read_mp4_tag_string(fd, size, &value_p, &length, &any);
id3->lead_trim = get_itunes_int32(value, 1);
id3->tail_trim = get_itunes_int32(value, 2);
DEBUGF("AAC: lead_trim %d, tail_trim %d\n",
id3->lead_trim, id3->tail_trim);
}
else if (strcasecmp(tag_name, "musicbrainz track id") == 0)
{
read_mp4_tag_string(fd, size, &buffer, &buffer_left,
&id3->mb_track_id);
}
else if ((strcasecmp(tag_name, "album artist") == 0))
{
read_mp4_tag_string(fd, size, &buffer, &buffer_left,
&id3->albumartist);
}
else
{
char* any = NULL;
unsigned int length = read_mp4_tag_string(fd, size,
&buffer, &buffer_left, &any);
if (length > 0)
{
/* Re-use the read buffer as the dest buffer... */
buffer -= length;
buffer_left += length;
parse_replaygain(tag_name, buffer, id3);
}
}
}
break;
default:
lseek(fd, size, SEEK_CUR);
break;
}
}
while ((size_left > 0) && (errno == 0));
return true;
}
static bool read_mp4_container(int fd, struct mp3entry* id3,
uint32_t size_left)
{
uint32_t size = 0;
uint32_t type = 0;
uint32_t handler = 0;
bool rc = true;
bool done = false;
do
{
size_left = read_mp4_atom(fd, &size, &type, size_left);
/* DEBUGF("Atom: '%c%c%c%c' (0x%08lx, %lu bytes left)\n",
(int) ((type >> 24) & 0xff), (int) ((type >> 16) & 0xff),
(int) ((type >> 8) & 0xff), (int) (type & 0xff),
type, size); */
switch (type)
{
case MP4_ftyp:
{
uint32_t id;
read_uint32be(fd, &id);
size -= 4;
if ((id != MP4_M4A) && (id != MP4_M4B) && (id != MP4_mp42)
&& (id != MP4_qt) && (id != MP4_3gp6) && (id != MP4_m4a)
&& (id != MP4_isom))
{
DEBUGF("Unknown MP4 file type: '%c%c%c%c'\n",
(int)(id >> 24 & 0xff), (int)(id >> 16 & 0xff),
(int)(id >> 8 & 0xff), (int)(id & 0xff));
return false;
}
}
break;
case MP4_meta:
lseek(fd, 4, SEEK_CUR); /* Skip version */
size -= 4;
/* Fall through */
case MP4_moov:
case MP4_udta:
case MP4_mdia:
case MP4_stbl:
case MP4_trak:
rc = read_mp4_container(fd, id3, size);
size = 0;
break;
case MP4_ilst:
/* We need at least a size of 8 to read the next atom. */
if (handler == MP4_mdir && size>8)
{
rc = read_mp4_tags(fd, id3, size);
size = 0;
}
break;
case MP4_minf:
if (handler == MP4_soun)
{
rc = read_mp4_container(fd, id3, size);
size = 0;
}
break;
case MP4_stsd:
lseek(fd, 8, SEEK_CUR);
size -= 8;
rc = read_mp4_container(fd, id3, size);
size = 0;
break;
case MP4_hdlr:
lseek(fd, 8, SEEK_CUR);
read_uint32be(fd, &handler);
size -= 12;
/* DEBUGF(" Handler '%c%c%c%c'\n", handler >> 24 & 0xff,
handler >> 16 & 0xff, handler >> 8 & 0xff,handler & 0xff); */
break;
case MP4_stts:
{
uint32_t entries;
unsigned int i;
/* Reset to false. */
id3->needs_upsampling_correction = false;
lseek(fd, 4, SEEK_CUR);
read_uint32be(fd, &entries);
id3->samples = 0;
for (i = 0; i < entries; i++)
{
uint32_t n;
uint32_t l;
read_uint32be(fd, &n);
read_uint32be(fd, &l);
/* Some AAC file use HE profile. In this case the number
* of output samples is doubled to a maximum of 2048
* samples per frame. This means that files which already
* report a frame size of 2048 in their header will not
* need any further special handling. */
if (id3->codectype==AFMT_MP4_AAC_HE && l<=1024)
{
id3->samples += n * l * 2;
id3->needs_upsampling_correction = true;
}
else
{
id3->samples += n * l;
}
}
size = 0;
}
break;
case MP4_mp4a:
{
uint32_t subsize;
uint32_t subtype;
/* Move to the next expected mp4 atom. */
lseek(fd, 28, SEEK_CUR);
read_mp4_atom(fd, &subsize, &subtype, size);
size -= 36;
if (subtype == MP4_esds)
{
/* Read esds metadata and return if AAC-HE/SBR is used. */
if (read_mp4_esds(fd, id3, &size))
id3->codectype = AFMT_MP4_AAC_HE;
else
id3->codectype = AFMT_MP4_AAC;
}
}
break;
case MP4_alac:
{
uint32_t frequency;
uint32_t subsize;
uint32_t subtype;
/* Move to the next expected mp4 atom. */
lseek(fd, 28, SEEK_CUR);
read_mp4_atom(fd, &subsize, &subtype, size);
size -= 36;
#if 0
/* We might need to parse for the alac metadata atom. */
while (!((subsize==28) && (subtype==MP4_alac)) && (size>0))
{
lseek(fd, -7, SEEK_CUR);
read_mp4_atom(fd, &subsize, &subtype, size);
size -= 1;
errno = 0; /* will most likely be set while parsing */
}
#endif
if (subtype == MP4_alac)
{
lseek(fd, 24, SEEK_CUR);
read_uint32be(fd, &frequency);
size -= 28;
id3->frequency = frequency;
id3->codectype = AFMT_MP4_ALAC;
}
}
break;
case MP4_mdat:
/* Some AAC files appear to contain additional empty mdat chunks.
Ignore them. */
if(size == 0)
break;
/* mdat chunks accumulate! */
id3->filesize += size;
if(id3->samples > 0) {
/* We've already seen the moov chunk. */
done = true;
}
break;
case MP4_chpl:
{
/* ADDME: add support for real chapters. Right now it's only
* used for Nero's gapless hack */
uint8_t chapters;
uint64_t timestamp;
lseek(fd, 8, SEEK_CUR);
read_uint8(fd, &chapters);
size -= 9;
/* the first chapter will be used as the lead_trim */
if (chapters > 0) {
read_uint64be(fd, &timestamp);
id3->lead_trim = (timestamp * id3->frequency) / 10000000;
size -= 8;
}
}
break;
default:
break;
}
/* Skip final seek. */
if (!done)
{
lseek(fd, size, SEEK_CUR);
}
} while (rc && (size_left > 0) && (errno == 0) && !done);
return rc;
}
bool get_mp4_metadata(int fd, struct mp3entry* id3)
{
id3->codectype = AFMT_UNKNOWN;
id3->filesize = 0;
errno = 0;
if (read_mp4_container(fd, id3, filesize(fd)) && (errno == 0)
&& (id3->samples > 0) && (id3->frequency > 0)
&& (id3->filesize > 0))
{
if (id3->codectype == AFMT_UNKNOWN)
{
logf("Not an ALAC or AAC file");
return false;
}
id3->length = ((int64_t) id3->samples * 1000) / id3->frequency;
id3->vbr = true; /* ALAC is native VBR, AAC very unlikely is CBR. */
if (id3->length <= 0)
{
logf("mp4 length invalid!");
return false;
}
id3->bitrate = ((int64_t) id3->filesize * 8) / id3->length;
DEBUGF("MP4 bitrate %d, frequency %ld Hz, length %ld ms\n",
id3->bitrate, id3->frequency, id3->length);
}
else
{
logf("MP4 metadata error");
DEBUGF("MP4 metadata error. errno %d, samples %ld, frequency %ld, "
"filesize %ld\n", errno, id3->samples, id3->frequency,
id3->filesize);
return false;
}
return true;
}