7#include "AudioTools/AudioCodecs/MP4Parser.h"
8#include "AudioTools/CoreAudio/Buffers.h"
25 enum class Codec { Unknown, AAC, ALAC, MP3 };
29 const char* mime =
nullptr;
48 uint8_t audioObjectType;
49 uint8_t samplingRateIndex;
50 uint8_t channelConfiguration;
54 bool parse(
const uint8_t* data,
size_t size) {
55 const uint8_t* ptr = data;
56 const uint8_t* end = data + size;
58 if (ptr + 4 > end)
return false;
61 if (ptr >= end || *ptr++ != 0x03)
return false;
62 size_t es_len = parse_descriptor_length(ptr, end);
63 if (ptr + es_len > end)
return false;
68 if (ptr >= end || *ptr++ != 0x04)
return false;
69 size_t dec_len = parse_descriptor_length(ptr, end);
70 if (ptr + dec_len > end)
return false;
75 if (ptr >= end || *ptr++ != 0x05)
return false;
76 size_t dsi_len = parse_descriptor_length(ptr, end);
77 if (ptr + dsi_len > end || dsi_len < 2)
return false;
79 uint8_t byte1 = ptr[0];
80 uint8_t byte2 = ptr[1];
82 audioObjectType = (byte1 >> 3) & 0x1F;
83 samplingRateIndex = ((byte1 & 0x07) << 1) | ((byte2 >> 7) & 0x01);
84 channelConfiguration = (byte2 >> 3) & 0x0F;
90 inline size_t parse_descriptor_length(
const uint8_t*& ptr,
93 for (
int i = 0; i < 4 && ptr < end; ++i) {
95 len = (len << 7) | (b & 0x7F);
96 if ((b & 0x80) == 0)
break;
110 using Codec = M4ACommonDemuxer::Codec;
112 using FrameCallback = std::function<void(
const Frame&,
void*)>;
125 p_chunk_offsets->
clear();
126 p_sample_sizes->
clear();
163 size_t write(
const uint8_t* data,
size_t len,
bool is_final) {
166 if (currentSize == 0) {
167 LOGE(
"No sample size defined: e.g. mdat before stsz!");
173 for (
int j = 0; j < len; j++) {
176 LOGI(
"Sample# %zu: size %zu bytes",
sampleIndex, currentSize);
183 LOGI(
"Reached end of box: %s write",
184 is_final ?
"final" :
"not final");
187 if (currentSize == 0) {
188 LOGE(
"No sample size defined, cannot write data");
163 size_t write(
const uint8_t* data,
size_t len,
bool is_final) {
…}
201 return *p_sample_sizes;
223 p_chunk_offsets = &
buffer;
245 frame.codec = audio_config.
codec;
248 switch (audio_config.
codec) {
251 tmp.resize(size + 7);
253 audio_config.sampleRateIdx, audio_config.
channelCfg,
256 frame.data = tmp.data();
257 frame.size = size + 7;
258 frame.mime =
"audio/aac";
262 frame.mime =
"audio/alac";
265 frame.mime =
"audio/mpeg";
268 frame.mime =
nullptr;
301 LOGE(
"No callback defined for audio frame extraction");
309 if (
buffer.size() < newSize) {
319 static size_t last_index = -1;
320 static size_t last_size = -1;
333 if (p_sample_sizes->
read(nextSize)) {
335 last_size = nextSize;
350 int sampleRateIdx,
int channelCfg,
354 adts[2] = ((aacProfile - 1) << 6) | (sampleRateIdx << 2) |
355 ((channelCfg >> 2) & 0x1);
356 adts[3] = ((channelCfg & 0x3) << 6) | ((frameLen + 7) >> 11);
357 adts[4] = ((frameLen + 7) >> 3) & 0xFF;
358 adts[5] = (((frameLen + 7) & 0x7) << 5) | 0x1F;
363 using FrameCallback = std::function<void(
const Frame&,
void* ref)>;
372 virtual void setCallback(FrameCallback cb) { frame_callback = cb; }
392 audio_config.
codec = Codec::Unknown;
395 chunk_offsets_count = 0;
405 audio_config.aacProfile = profile;
406 audio_config.sampleRateIdx = srIdx;
410 void setM4AAudioConfig(M4AAudioConfig cfg) { audio_config = cfg; }
412 M4AAudioConfig getM4AAudioConfig() {
return audio_config; }
414 void resize(
int size) {
416 if (
buffer.size() < size) {
431 virtual void setupParser() = 0;
434 FrameCallback frame_callback =
nullptr;
440 bool stsd_processed =
false;
444 uint32_t stsz_offset = 0;
445 uint32_t chunk_offsets_count = 0;
454 return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
457 static uint32_t
readU32(
const uint32_t num) {
458 uint8_t* p = (uint8_t*)#
459 return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
462 uint32_t readU32Buffer() {
463 uint32_t nextSize = 0;
477 if (
buffer ==
nullptr || type ==
nullptr)
return false;
478 bool result =
buffer[offset] == type[0] &&
buffer[offset + 1] == type[1] &&
479 buffer[offset + 2] == type[2] &&
480 buffer[offset + 3] == type[3];
485 LOGI(
"Box: %s, size: %u bytes", box.
type, (
unsigned)box.
available);
508 LOGI(
"onMp4a: %s, size: %zu bytes", box.
type, box.
data_size);
515 int sampleRateIdx = 4;
518 audio_config.
codec = Codec::AAC;
531 LOGI(
"onEsds: %s, size: %zu bytes", box.
type, box.
data_size);
535 LOGE(
"Failed to parse esds box");
539 "-> esds: AAC objectType: %u, samplingRateIdx: %u, "
541 esdsParser.audioObjectType, esdsParser.samplingRateIndex,
542 esdsParser.channelConfiguration);
543 setAACConfig(esdsParser.audioObjectType, esdsParser.samplingRateIndex,
544 esdsParser.channelConfiguration);
586 LOGI(
"onAlac: %s, size: %zu bytes", box.
type, box.
data_size);
587 audio_config.
codec = Codec::ALAC;
605 LOGI(
"onStsz #%u: %s, size: %u of %u bytes", (
unsigned) box.
seq, box.
type, (
unsigned) box.
available, (
unsigned) box.
data_size);
617 uint32_t sampleSize = readU32Buffer();
618 uint32_t sampleCount = readU32Buffer();
623 if (sampleSize != 0) {
632 assert(sampleSizes.
write(sampleSize));
676 const uint8_t* data = box.
data;
678 LOGI(
"===========================");
679 for (
size_t i = 0; i < len; i += 16) {
681 char ascii[17] = {0};
682 for (
size_t j = 0; j < 16 && i + j < len; ++j) {
683 sprintf(hex + j * 3,
"%02X ", data[i + j]);
684 ascii[j] = (data[i + j] >= 32 && data[i + j] < 127) ? data[i + j] :
'.';
687 LOGI(
"%04zx: %-48s |%s|", i, hex, ascii);
689 LOGI(
"===========================");