arduino-audio-tools
All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends Modules Pages
HeaderParserMP3.h
1#pragma once
2#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
3
4namespace audio_tools {
5
18 // MPEG audio frame header
19 // variables are declared in their serialized order
20 // includes crc value
21 struct __attribute__((packed)) FrameHeader {
22 static const unsigned int SERIALIZED_SIZE = 4;
23
24 // bitmasks for frame header fields grouped by byte
25 static const unsigned char FRAMESYNC_FIRST_BYTEMASK = 0b11111111;
26
27 static const unsigned char FRAMESYNC_SECOND_BYTEMASK = 0b1110000;
28 static const unsigned char AUDIO_VERSION_MASK = 0b00011000;
29 static const unsigned char LAYER_DESCRIPTION_MASK = 0b00000110;
30 static const unsigned char PROTECTION_BIT_MASK = 0b00000001;
31
32 static const unsigned char BITRATE_INDEX_MASK = 0b11110000;
33 static const unsigned char SAMPLERATE_INDEX_MASK = 0b00001100;
34 static const unsigned char PADDING_BIT_MASK = 0b00000010;
35 static const unsigned char PRIVATE_BIT_MASK = 0b00000001;
36
37 static const unsigned char CHANNEL_MODE_MASK = 0b11000000;
38 static const unsigned char MODE_EXTENTION_MASK = 0b00110000;
39 static const unsigned char COPYRIGHT_BIT_MASK = 0b00001000;
40 static const unsigned char ORIGINAL_BIT_MASK = 0b00000100;
41 static const unsigned char EMPHASIS_MASK = 0b00000011;
42
43 char FrameSyncByte;
44 bool FrameSyncBits : 3;
45
46 // indicates MPEG standard version
47 enum class AudioVersionID : unsigned {
48 MPEG_2_5 = 0b00,
49 INVALID = 0b01, // reserved
50 MPEG_2 = 0b10,
51 MPEG_1 = 0b11,
52 } AudioVersion : 2;
53
54 // indicates which audio layer of the MPEG standard
55 enum class LayerID : unsigned {
56 INVALID = 0b00, // reserved
57 LAYER_3 = 0b01,
58 LAYER_2 = 0b10,
59 LAYER_1 = 0b11,
60 } Layer : 2;
61
62 // indicates whether theres a 16 bit crc checksum following the header
63 bool Protection : 1;
64
65 // sample & bitrate indexes meaning differ depending on MPEG version
66 // use getBitRate() and GetSamplerate()
67 bool BitrateIndex : 4;
68 bool SampleRateIndex : 2;
69
70 // indicates whether the audio data is padded with 1 extra byte (slot)
71 bool Padding : 1;
72
73 // this is only informative
74 bool Private : 1;
75
76 // indicates channel mode
77 enum class ChannelModeID : unsigned {
78 STEREO = 0b00,
79 JOINT = 0b01, // joint stereo
80 DUAL = 0b10, // dual channel (2 mono channels)
81 SINGLE = 0b11, // single channel (mono)
82 } ChannelMode : 2;
83
84 // Only used in joint channel mode. Meaning differ depending on audio layer
85 // Use GetExtentionMode()
86 bool ExtentionMode : 2;
87
88 // indicates whether the audio is copyrighted
89 bool Copyright : 1;
90
91 // indicates whether the frame is located on the original media or a copy
92 bool Original : 1;
93
94 // indicates to the decoder that the file must be de-emphasized, ie the
95 // decoder must 're-equalize' the sound after a Dolby-like noise supression.
96 // It is rarely used.
97 enum class EmphasisID : unsigned {
98 NONE = 0b00,
99 MS_50_15 = 0b01,
100 INVALID = 0b10,
101 CCIT_J17 = 0b10,
102 } Emphasis : 2;
103
104 enum SpecialBitrate {
105 INVALID = -8000,
106 ANY = 0,
107 };
108
109 signed int getBitRate() const {
110 // version, layer, bit index
111 static signed char rateTable[4][4][16] = {
112 // version[00] = MPEG_2_5
113 {
114 // layer[00] = INVALID
115 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
116 // layer[01] = LAYER_3
117 {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
118 // layer[10] = LAYER_2
119 {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
120 // layer[11] = LAYER_1
121 {0, 4, 6, 7, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, -1},
122 },
123
124 // version[01] = INVALID
125 {
126 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
127 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
128 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
129 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
130 },
131
132 // version[10] = MPEG_2
133 {
134 // layer[00] = INVALID
135 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
136 // layer[01] = LAYER_3
137 {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
138 // layer[10] = LAYER_2
139 {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
140 // layer[11] = LAYER_1
141 {0, 4, 6, 7, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, -1},
142 },
143
144 // version[11] = MPEG_1
145 {
146 // layer[00] = INVALID
147 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
148 // layer[01] = LAYER_3
149 {0, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, -1},
150 // layer[10] = LAYER_2
151 {0, 4, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, -1},
152 // layer[11] = LAYER_1
153 {0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, -1},
154 },
155 };
156 signed char rate_byte = rateTable[AudioVersion][Layer][BitrateIndex];
157 if (rate_byte == -1) {
158 LOGE("Unsupported bitrate");
159 return 0;
160 }
161 return rate_byte * 8000;
162 }
163
164 enum SpecialSampleRate {
165 RESERVED = 0,
166 };
167
168 unsigned short getSampleRate() const {
169 // version, sample rate index
170 static unsigned short rateTable[4][4] = {
171 // version[00] = MPEG_2_5
172 {11025, 12000, 8000, 0},
173 // version[01] = INVALID
174 {0, 0, 0, 0},
175 // version[10] = MPEG_2
176 {22050, 24000, 16000, 0},
177 // version[11] = MPEG_1
178 {44100, 48000, 32000, 0},
179 };
180
181 return rateTable[AudioVersion][SampleRateIndex];
182 }
183
184 int getFrameLength() {
185 int sample_rate = getSampleRate();
186 if (sample_rate == 0) return 0;
187 return int((144 * getBitRate() / sample_rate) + Padding);
188 }
189 };
190
191 public:
193 bool isValid(const uint8_t* data, int len) {
194 memset(&header, 0, sizeof(header));
195
196 // if we start with ID3 -> valid mp3
197 if (memcmp(data, "ID3", 3) == 0) {
198 LOGI("ID3 found");
199 return true;
200 }
201
202 int sync_pos = seekFrameSync(data, len);
203 if (sync_pos == -1) {
204 LOGE("Could not find FrameSync");
205 return false;
206 }
207
208 // xing header -> valid mp3
209 if (sync_pos >= 0 && contains(data, "Xing", len)) {
210 LOGI("Xing found");
211 return true;
212 }
213
214 // xing header -> valid mp3
215 if (sync_pos >= 0 && contains(data, "Info", len)) {
216 LOGI("Xing Info found");
217 return true;
218 }
219
220 // find valid segement in available data
221 bool is_valid_mp3 = false;
222 while (true) {
223 LOGI("checking header at %d", sync_pos);
224 int len_available = len - sync_pos;
225
226 // check if we have enough data for header
227 if (len_available < sizeof(header)) {
228 LOGE("Not enough data to determine mp3 header");
229 break;
230 }
231
232 readFrameHeader(data + sync_pos);
233 is_valid_mp3 = validate(data + sync_pos, len_available);
234
235 // check expected expected end of frame ( next frame)
236 int frame_len = getFrameLength();
237 if (is_valid_mp3 && frame_len > 0) {
238 int expected_next_frame = sync_pos + getFrameLength();
239 int pos = seekFrameSync(data + expected_next_frame,
240 len - expected_next_frame);
241 LOGI("- end frame found: %s", pos == 0 ? "yes" : "no");
242 if (pos != 0) is_valid_mp3 = false;
243 }
244
245 // find end sync
246 int pos = seekFrameSync(data + sync_pos + 2, len_available - 2);
247 // no more data to be validated
248 if (pos == -1) break;
249 // calculate new sync_pos
250 sync_pos = pos + sync_pos + 2;
251
252 // success and we found an end sync with a bit rate
253 if (is_valid_mp3 && getSampleRate() != 0) break;
254 }
255 if (is_valid_mp3) {
256 LOGI("-------------------");
257 LOGI("is mp3: %s", is_valid_mp3 ? "yes" : "no");
258 LOGI("frame size: %d", getFrameLength());
259 LOGI("sample rate: %u", getSampleRate());
260 // LOGI("bit rate index: %d", getFrameHeader().BitrateIndex);
261 LOGI("bit rate: %d", getBitRate());
262 LOGI("Padding: %d", getFrameHeader().Padding);
263 LOGI("Layer: %s (0x%x)", getLayerStr(), (int)getFrameHeader().Layer);
264 LOGI("Version: %s (0x%x)", getVersionStr(),
265 (int)getFrameHeader().AudioVersion);
266 LOGI("-------------------");
267 }
268 return is_valid_mp3;
269 }
270
271 uint16_t getSampleRate() const { return header.getSampleRate(); }
272
273 int getBitRate() const { return header.getBitRate(); }
274
276 int getFrameLength() { return header.getFrameLength(); }
277
280 size_t getPlayingTime(size_t fileSizeBytes) {
281 int bitrate = getBitRate();
282 if (bitrate == 0) return 0;
283 return fileSizeBytes / bitrate;
284 }
285
286 const char* getVersionStr() const {
287 return header.AudioVersion == FrameHeader::AudioVersionID::MPEG_1 ? "1"
288 : header.AudioVersion == FrameHeader::AudioVersionID::MPEG_2 ? "2"
289 : header.AudioVersion == FrameHeader::AudioVersionID::MPEG_2_5
290 ? "2.5"
291 : "INVALID";
292 }
293
294 const char* getLayerStr() const {
295 return header.Layer == FrameHeader::LayerID::LAYER_1 ? "1"
296 : header.Layer == FrameHeader::LayerID::LAYER_2 ? "2"
297 : header.Layer == FrameHeader::LayerID::LAYER_3 ? "3"
298 : "INVALID";
299 }
300
301 // provides the parsed MP3 frame header
302 FrameHeader getFrameHeader() { return header; }
303
305 int findSyncWord(const uint8_t* buf, size_t nBytes, uint8_t synch = 0xFF,
306 uint8_t syncl = 0xF0) {
307 for (int i = 0; i < nBytes - 1; i++) {
308 if ((buf[i + 0] & synch) == synch && (buf[i + 1] & syncl) == syncl)
309 return i;
310 }
311 return -1;
312 }
313
314 protected:
315 FrameHeader header;
316
317 bool validate(const uint8_t* data, size_t len) {
318 assert(header.FrameSyncByte = 0xFF);
319 // check end of frame: it must contains a sync word
320 return FrameReason::VALID == validateFrameHeader(header);
321 }
322
323 bool contains(const uint8_t* data, const char* toFind, size_t len) {
324 if (data == nullptr || len == 0) return false;
325 int find_str_len = strlen(toFind);
326 for (int j = 0; j < len - find_str_len; j++) {
327 if (memcmp(data + j, toFind, find_str_len) == 0) return true;
328 }
329 return false;
330 }
331
332 // Seeks to the byte at the end of the next continuous run of 11 set bits.
333 //(ie. after seeking the cursor will be on the byte of which its 3 most
334 // significant bits are part of the frame sync)
335 int seekFrameSync(const uint8_t* str, size_t len) {
336 char cur;
337 for (int j = 0; j < len - 1; j++) {
338 cur = str[j];
339 // read bytes until EOF or a byte with all bits set is encountered
340 if ((cur & 0b11111111) != 0b11111111) continue;
341
342 if ((str[j + 1] & 0b11100000) != 0b11100000) {
343 // if the next byte does not have its 3 most significant bits set it is
344 // not the end of the framesync and it also cannot be the start of a
345 // framesync so just skip over it here without the check
346 continue;
347 }
348 return j;
349 }
350
351 return -1;
352 }
353
354 void readFrameHeader(const uint8_t* data) {
355 assert(data[0] == 0xFF);
356 assert((data[1] & 0b11100000) == 0b11100000);
357
358 memcpy(&header, data, sizeof(header));
359
360 LOGI("- sample rate: %u", getSampleRate());
361 LOGI("- bit rate: %d", getBitRate());
362 }
363
364 enum class FrameReason {
365 VALID,
366 INVALID_BITRATE_FOR_VERSION,
367 INVALID_SAMPLERATE_FOR_VERSION,
368 INVALID_MPEG_VERSION,
369 INVALID_LAYER,
370 INVALID_LAYER_II_BITRATE_AND_MODE,
371 INVALID_EMPHASIS,
372 INVALID_CRC,
373 };
374
375 FrameReason validateFrameHeader(const FrameHeader& header) {
376 if (header.AudioVersion == FrameHeader::AudioVersionID::INVALID) {
377 LOGI("invalid mpeg version");
378 return FrameReason::INVALID_MPEG_VERSION;
379 }
380
381 if (header.Layer == FrameHeader::LayerID::INVALID) {
382 LOGI("invalid layer");
383 return FrameReason::INVALID_LAYER;
384 }
385
386 if (header.getBitRate() == FrameHeader::SpecialBitrate::INVALID) {
387 LOGI("invalid bitrate");
388 return FrameReason::INVALID_BITRATE_FOR_VERSION;
389 }
390
391 if (header.getSampleRate() == FrameHeader::SpecialSampleRate::RESERVED) {
392 LOGI("invalid samplerate");
393 return FrameReason::INVALID_SAMPLERATE_FOR_VERSION;
394 }
395
396 // For Layer II there are some combinations of bitrate and mode which are
397 // not allowed
398 if (header.Layer == FrameHeader::LayerID::LAYER_2) {
399 if (header.ChannelMode == FrameHeader::ChannelModeID::SINGLE) {
400 if (header.getBitRate() >= 224000) {
401 LOGI("invalid bitrate >224000");
402 return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
403 }
404 } else {
405 if (header.getBitRate() >= 32000 && header.getBitRate() <= 56000) {
406 LOGI("invalid bitrate >32000");
407 return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
408 }
409
410 if (header.getBitRate() == 80000) {
411 LOGI("invalid bitrate >80000");
412 return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
413 }
414 }
415 }
416
417 if (header.Emphasis == FrameHeader::EmphasisID::INVALID) {
418 LOGI("invalid Emphasis");
419 return FrameReason::INVALID_EMPHASIS;
420 }
421
422 return FrameReason::VALID;
423 }
424};
425
426} // namespace audio_tools
MP3 header parser to check if the data is a valid mp3 and to extract some relevant audio information....
Definition HeaderParserMP3.h:17
size_t getPlayingTime(size_t fileSizeBytes)
Definition HeaderParserMP3.h:280
int getFrameLength()
Determines the frame length.
Definition HeaderParserMP3.h:276
int findSyncWord(const uint8_t *buf, size_t nBytes, uint8_t synch=0xFF, uint8_t syncl=0xF0)
Finds the mp3/aac sync word.
Definition HeaderParserMP3.h:305
bool isValid(const uint8_t *data, int len)
parses the header string and returns true if this is a valid mp3 file
Definition HeaderParserMP3.h:193
Generic Implementation of sound input and output for desktop environments using portaudio.
Definition AudioCodecsBase.h:10