7#include "AudioTools/AudioCodecs/MP4Parser.h"
8#include "AudioTools/CoreAudio/Buffers.h"
27 enum class Codec { Unknown, AAC, ALAC, MP3 };
31 const char* mime =
nullptr;
50 uint8_t audioObjectType;
51 uint8_t samplingRateIndex;
52 uint8_t channelConfiguration;
56 bool parse(
const uint8_t* data,
size_t size) {
57 const uint8_t* ptr = data;
58 const uint8_t* end = data + size;
60 if (ptr + 4 > end)
return false;
63 if (ptr >= end || *ptr++ != 0x03)
return false;
64 size_t es_len = parse_descriptor_length(ptr, end);
65 if (ptr + es_len > end)
return false;
70 if (ptr >= end || *ptr++ != 0x04)
return false;
71 size_t dec_len = parse_descriptor_length(ptr, end);
72 if (ptr + dec_len > end)
return false;
77 if (ptr >= end || *ptr++ != 0x05)
return false;
78 size_t dsi_len = parse_descriptor_length(ptr, end);
79 if (ptr + dsi_len > end || dsi_len < 2)
return false;
81 uint8_t byte1 = ptr[0];
82 uint8_t byte2 = ptr[1];
84 audioObjectType = (byte1 >> 3) & 0x1F;
85 samplingRateIndex = ((byte1 & 0x07) << 1) | ((byte2 >> 7) & 0x01);
86 channelConfiguration = (byte2 >> 3) & 0x0F;
92 inline size_t parse_descriptor_length(
const uint8_t*& ptr,
95 for (
int i = 0; i < 4 && ptr < end; ++i) {
97 len = (len << 7) | (b & 0x7F);
98 if ((b & 0x80) == 0)
break;
112 using Codec = M4ACommonDemuxer::Codec;
114 using FrameCallback = std::function<void(
const Frame&,
void*)>;
127 p_chunk_offsets->
clear();
128 p_sample_sizes->
clear();
165 size_t write(
const uint8_t* data,
size_t len,
bool is_final) {
168 if (currentSize == 0) {
169 LOGE(
"No sample size defined: e.g. mdat before stsz!");
175 for (
int j = 0; j < len; j++) {
178 LOGI(
"Sample# %zu: size %zu bytes",
sampleIndex, currentSize);
185 LOGI(
"Reached end of box: %s write",
186 is_final ?
"final" :
"not final");
189 if (currentSize == 0) {
190 LOGE(
"No sample size defined, cannot write data");
203 return *p_sample_sizes;
225 p_chunk_offsets = &
buffer;
247 frame.codec = audio_config.
codec;
250 switch (audio_config.
codec) {
253 tmp.resize(size + 7);
255 audio_config.sampleRateIdx, audio_config.
channelCfg,
258 frame.data = tmp.data();
259 frame.size = size + 7;
260 frame.mime =
"audio/aac";
264 frame.mime =
"audio/alac";
267 frame.mime =
"audio/mpeg";
270 frame.mime =
nullptr;
303 LOGE(
"No callback defined for audio frame extraction");
311 if (
buffer.size() < newSize) {
321 static size_t last_index = -1;
322 static size_t last_size = -1;
335 if (p_sample_sizes->
read(nextSize)) {
337 last_size = nextSize;
352 int sampleRateIdx,
int channelCfg,
356 adts[2] = ((aacProfile - 1) << 6) | (sampleRateIdx << 2) |
357 ((channelCfg >> 2) & 0x1);
358 adts[3] = ((channelCfg & 0x3) << 6) | ((frameLen + 7) >> 11);
359 adts[4] = ((frameLen + 7) >> 3) & 0xFF;
360 adts[5] = (((frameLen + 7) & 0x7) << 5) | 0x1F;
365 using FrameCallback = std::function<void(
const Frame&,
void* ref)>;
374 virtual void setCallback(FrameCallback cb) { frame_callback = cb; }
394 audio_config.
codec = Codec::Unknown;
397 chunk_offsets_count = 0;
407 audio_config.aacProfile = profile;
408 audio_config.sampleRateIdx = srIdx;
412 void setM4AAudioConfig(M4AAudioConfig cfg) { audio_config = cfg; }
414 M4AAudioConfig getM4AAudioConfig() {
return audio_config; }
416 void resize(
int size) {
418 if (
buffer.size() < size) {
433 virtual void setupParser() = 0;
436 FrameCallback frame_callback =
nullptr;
442 bool stsd_processed =
false;
446 uint32_t stsz_offset = 0;
447 uint32_t chunk_offsets_count = 0;
456 return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
459 static uint32_t
readU32(
const uint32_t num) {
460 uint8_t* p = (uint8_t*)#
461 return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
464 uint32_t readU32Buffer() {
465 uint32_t nextSize = 0;
479 if (
buffer ==
nullptr || type ==
nullptr)
return false;
480 bool result =
buffer[offset] == type[0] &&
buffer[offset + 1] == type[1] &&
481 buffer[offset + 2] == type[2] &&
482 buffer[offset + 3] == type[3];
487 LOGI(
"Box: %s, size: %u bytes", box.
type, (
unsigned)box.
available);
510 LOGI(
"onMp4a: %s, size: %zu bytes", box.
type, box.
data_size);
517 int sampleRateIdx = 4;
520 audio_config.
codec = Codec::AAC;
533 LOGI(
"onEsds: %s, size: %zu bytes", box.
type, box.
data_size);
537 LOGE(
"Failed to parse esds box");
541 "-> esds: AAC objectType: %u, samplingRateIdx: %u, "
543 esdsParser.audioObjectType, esdsParser.samplingRateIndex,
544 esdsParser.channelConfiguration);
545 setAACConfig(esdsParser.audioObjectType, esdsParser.samplingRateIndex,
546 esdsParser.channelConfiguration);
588 LOGI(
"onAlac: %s, size: %zu bytes", box.
type, box.
data_size);
589 audio_config.
codec = Codec::ALAC;
607 LOGI(
"onStsz #%u: %s, size: %u of %u bytes", (
unsigned) box.
seq, box.
type, (
unsigned) box.
available, (
unsigned) box.
data_size);
619 uint32_t sampleSize = readU32Buffer();
620 uint32_t sampleCount = readU32Buffer();
625 if (sampleSize != 0) {
634 assert(sampleSizes.
write(sampleSize));
678 const uint8_t* data = box.
data;
680 LOGI(
"===========================");
681 for (
size_t i = 0; i < len; i += 16) {
683 char ascii[17] = {0};
684 for (
size_t j = 0; j < 16 && i + j < len; ++j) {
685 sprintf(hex + j * 3,
"%02X ", data[i + j]);
686 ascii[j] = (data[i + j] >= 32 && data[i + j] < 127) ? data[i + j] :
'.';
689 LOGI(
"%04zx: %-48s |%s|", i, hex, ascii);
691 LOGI(
"===========================");
uint16_t stsz_sample_size_t
Sample size type optimized for microcontrollers.
Definition M4ACommonDemuxer.h:19