28 static const unsigned int SERIALIZED_SIZE = 4;
30 enum class MPEGVersionID :
unsigned {
37 enum class LayerID :
unsigned {
44 enum class ChannelModeID :
unsigned {
51 enum class EmphasisID :
unsigned {
58 enum SpecialBitrate { INVALID_BITRATE = -8000, ANY = 0 };
59 enum SpecialSampleRate { RESERVED = 0 };
62 MPEGVersionID audioVersion = MPEGVersionID::INVALID;
63 LayerID layer = LayerID::INVALID;
64 bool protection =
false;
65 uint8_t bitrateIndex = 0;
66 uint8_t sampleRateIndex = 0;
68 bool isPrivate =
false;
69 ChannelModeID channelMode = ChannelModeID::STEREO;
70 uint8_t extensionMode = 0;
71 bool copyright =
false;
72 bool original =
false;
73 EmphasisID emphasis = EmphasisID::NONE;
76 static bool decode(
const uint8_t* b, FrameHeader& out) {
77 if (b ==
nullptr)
return false;
78 if (!(b[0] == 0xFF && (b[1] & 0xE0) == 0xE0))
85 out.audioVersion =
static_cast<MPEGVersionID
>((b1 >> 3) & 0x03);
86 out.layer =
static_cast<LayerID
>((b1 >> 1) & 0x03);
87 out.protection = !(b1 & 0x01);
89 out.bitrateIndex = (b2 >> 4) & 0x0F;
90 out.sampleRateIndex = (b2 >> 2) & 0x03;
91 out.padding = (b2 >> 1) & 0x01;
92 out.isPrivate = (b2 & 0x01) != 0;
94 out.channelMode =
static_cast<ChannelModeID
>((b3 >> 6) & 0x03);
95 out.extensionMode = (b3 >> 4) & 0x03;
96 out.copyright = (b3 >> 3) & 0x01;
97 out.original = (b3 >> 2) & 0x01;
98 out.emphasis =
static_cast<EmphasisID
>(b3 & 0x03);
102 signed int getBitRate()
const {
104 static const signed char rateTable[4][4][16] = {
108 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
110 {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
112 {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
114 {0, 4, 6, 7, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, -1},
119 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
120 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
121 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
122 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
128 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
130 {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
132 {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
134 {0, 4, 6, 7, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, -1},
140 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
142 {0, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, -1},
144 {0, 4, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, -1},
146 {0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, -1},
149 signed char rate_byte =
150 rateTable[(int)audioVersion][(
int)layer][(int)bitrateIndex];
151 if (rate_byte == -1) {
152 LOGE(
"Unsupported bitrate");
155 return rate_byte * 8000;
158 unsigned short getSampleRate()
const {
160 static const unsigned short rateTable[4][4] = {
162 {11025, 12000, 8000, 0},
166 {22050, 24000, 16000, 0},
168 {44100, 48000, 32000, 0},
171 return rateTable[(int)audioVersion][(
int)sampleRateIndex];
174 int getFrameLength()
const {
175 int sample_rate = getSampleRate();
176 if (sample_rate == 0)
return 0;
178 (audioVersion == FrameHeader::MPEGVersionID::MPEG_1) ? 144 : 72;
179 return int((value * getBitRate() / sample_rate) + (padding ? 1 : 0));
189 : p_output(&output), buffer_size(bufferSize) {}
191 void setOutput(
Print& output) { p_output = &output; }
193 void resize(
int size) { buffer_size = size; }
196 size_t write(
const uint8_t* data,
size_t len) {
197 if (buffer.size() < buffer_size) buffer.
resize(buffer_size);
199 for (
int i = 0; i < len; i++) {
200 buffer.
write(data[i]);
210 if (p_output ==
nullptr)
return;
215 bool isValid() {
return last_frame_size > 0; }
219 if (data ==
nullptr || len < 10) {
220 LOGE(
"Invalid input data or too small");
224 header = FrameHeader{};
225 int valid_frames_found = 0;
226 int consecutive_frames = 0;
227 const int MIN_FRAMES_TO_VALIDATE =
229 const int MAX_SEARCH_DISTANCE =
233 if (len >= 10 && memcmp(data,
"ID3", 3) == 0) {
234 LOGI(
"ID3v2 tag found");
236 int id3_size = ((data[6] & 0x7F) << 21) | ((data[7] & 0x7F) << 14) |
237 ((data[8] & 0x7F) << 7) | (data[9] & 0x7F);
238 int audio_start = 10 + id3_size;
239 if (audio_start < len) {
240 return isValid(data + audio_start, len - audio_start);
246 int sync_pos = seekFrameSync(data, min(len, MAX_SEARCH_DISTANCE));
247 if (sync_pos == -1) {
248 LOGE(
"No frame sync found in first %d bytes", MAX_SEARCH_DISTANCE);
253 if (contains(data + sync_pos,
"Xing", len - sync_pos) ||
254 contains(data + sync_pos,
"Info", len - sync_pos) ||
255 contains(data + sync_pos,
"VBRI", len - sync_pos)) {
256 LOGI(
"VBR header found (Xing/Info/VBRI)");
261 int current_pos = sync_pos;
262 FrameHeader first_header;
263 bool first_header_set =
false;
265 while (current_pos < len &&
266 (current_pos - sync_pos) < MAX_SEARCH_DISTANCE) {
267 int len_available = len - current_pos;
270 if (len_available < (
int)FrameHeader::SERIALIZED_SIZE) {
271 LOGD(
"Not enough data for header at position %d", current_pos);
276 FrameHeader temp_header;
277 if (!FrameHeader::decode(data + current_pos, temp_header) ||
278 validateFrameHeader(temp_header) != FrameReason::VALID) {
279 LOGD(
"Invalid frame header at position %d", current_pos);
280 consecutive_frames = 0;
283 seekFrameSync(data + current_pos + 1, len - current_pos - 1);
284 if (next_sync_off == -1)
break;
285 current_pos = current_pos + 1 + next_sync_off;
290 int frame_len = temp_header.getFrameLength();
291 if (frame_len <= 0 || frame_len > 4096) {
292 LOGD(
"Invalid frame length %d at position %d", frame_len, current_pos);
293 consecutive_frames = 0;
299 if (!first_header_set) {
300 first_header = temp_header;
301 first_header_set =
true;
302 header = temp_header;
307 if (temp_header.getSampleRate() == 0 ||
308 temp_header.getBitRate() <= 0) {
309 LOGD(
"Invalid audio parameters in frame at position %d",
311 first_header_set =
false;
312 consecutive_frames = 0;
318 int expected_frame_size =
319 (temp_header.audioVersion == FrameHeader::MPEGVersionID::MPEG_1)
320 ? (144 * temp_header.getBitRate() /
321 temp_header.getSampleRate())
322 : (72 * temp_header.getBitRate() /
323 temp_header.getSampleRate());
324 if (abs(frame_len - expected_frame_size) >
325 expected_frame_size * 0.1) {
326 LOGD(
"Frame length %d doesn't match expected %d for bitrate",
327 frame_len, expected_frame_size);
328 first_header_set =
false;
329 consecutive_frames = 0;
337 if (temp_header.audioVersion != first_header.audioVersion ||
338 temp_header.layer != first_header.layer ||
339 temp_header.getSampleRate() != first_header.getSampleRate()) {
340 LOGD(
"Frame parameters inconsistent at position %d", current_pos);
345 valid_frames_found++;
346 consecutive_frames++;
349 if (len_available < frame_len) {
350 LOGD(
"Incomplete frame at position %d (need %d, have %d)", current_pos,
351 frame_len, len_available);
356 int next_pos = current_pos + frame_len;
357 if (next_pos + 1 < len) {
358 if (seekFrameSync(data + next_pos, min(4, len - next_pos)) == 0) {
360 current_pos = next_pos;
363 LOGD(
"No sync at expected position %d", next_pos);
364 consecutive_frames = 0;
373 seekFrameSync(data + current_pos + 1, len - current_pos - 1);
374 if (next_sync == -1)
break;
375 current_pos = current_pos + 1 + next_sync;
379 bool is_valid_mp3 =
false;
383 is_valid_mp3 = (consecutive_frames >= MIN_FRAMES_TO_VALIDATE);
384 }
else if (len >= 1024) {
387 is_valid_mp3 = (consecutive_frames >= 2) ||
388 (valid_frames_found >= MIN_FRAMES_TO_VALIDATE);
392 is_valid_mp3 = (valid_frames_found >= 1) && first_header_set;
395 if (is_valid_mp3 && first_header_set) {
396 LOGI(
"-------------------");
397 LOGI(
"MP3 validation: VALID");
398 LOGI(
"Data size: %d bytes", len);
399 LOGI(
"Valid frames found: %d", valid_frames_found);
400 LOGI(
"Consecutive frames: %d", consecutive_frames);
402 LOGI(
"Validation mode: STRICT (large buffer)");
403 }
else if (len >= 1024) {
404 LOGI(
"Validation mode: MODERATE (1KB+ buffer)");
406 LOGI(
"Validation mode: LENIENT (small buffer)");
411 LOGI(
"Padding: %d", getFrameHeader().padding);
412 LOGI(
"Layer: %s (0x%x)",
getLayerStr(), (
int)getFrameHeader().layer);
414 (
int)getFrameHeader().audioVersion);
415 LOGI(
"-------------------");
417 LOGI(
"MP3 validation: INVALID (frames: %d, consecutive: %d, size: %d)",
418 valid_frames_found, consecutive_frames, len);
426 return frame_header_valid ? header.getSampleRate() : 0;
431 return frame_header_valid ? header.getBitRate() : 0;
436 if (!frame_header_valid)
return 0;
438 return (header.channelMode == FrameHeader::ChannelModeID::SINGLE) ? 1 : 2;
443 return frame_header_valid ? header.getFrameLength() : 0;
450 if (bitrate == 0)
return 0;
451 return fileSizeBytes / bitrate;
456 return header.audioVersion == FrameHeader::MPEGVersionID::MPEG_1 ?
"1"
457 : header.audioVersion == FrameHeader::MPEGVersionID::MPEG_2 ?
"2"
458 : header.audioVersion == FrameHeader::MPEGVersionID::MPEG_2_5
465 return header.layer == FrameHeader::LayerID::LAYER_1 ?
"1"
466 : header.layer == FrameHeader::LayerID::LAYER_2 ?
"2"
467 : header.layer == FrameHeader::LayerID::LAYER_3 ?
"3"
473 if (header.layer != FrameHeader::LayerID::LAYER_3)
return 0;
475 return header.audioVersion == FrameHeader::MPEGVersionID::MPEG_1 ? 1152
482 if (sample_rate == 0)
return 0;
489 if (time_per_frame == 0)
return 0;
490 return 1000 / time_per_frame;
494 FrameHeader getFrameHeader() {
495 return frame_header_valid ? header : FrameHeader{};
504 frame_header_valid =
false;
505 header = FrameHeader{};
509 int findSyncWord(
const uint8_t* buf,
size_t nBytes, uint8_t synch = 0xFF,
510 uint8_t syncl = 0xF0) {
511 for (
int i = 0; i < nBytes - 1; i++) {
512 if ((buf[i + 0] & synch) == synch && (buf[i + 1] & syncl) == syncl)
520 Print* p_output =
nullptr;
522 bool frame_header_valid =
false;
523 size_t buffer_size = 0;
524 size_t last_frame_size = 0;
528 bool progress =
false;
532 FrameHeader::SERIALIZED_SIZE) {
534 uint8_t* temp_data = buffer.
data();
537 int sync_pos = seekFrameSync(temp_data, available);
538 if (sync_pos == -1) {
540 size_t to_remove = (available > 3) ? available - 3 : 0;
559 if (available < FrameHeader::SERIALIZED_SIZE) {
564 FrameHeader temp_header;
565 if (!FrameHeader::decode(temp_data, temp_header) ||
566 validateFrameHeader(temp_header) != FrameReason::VALID) {
575 int frame_len = temp_header.getFrameLength();
576 if (frame_len <= 0 ||
577 frame_len > buffer_size) {
586 if (available < frame_len) {
591 if (available >= frame_len + 2) {
592 if (seekFrameSync(temp_data + frame_len, 2) != 0) {
602 if (p_output !=
nullptr) {
603 size_t written = p_output->write(temp_data, frame_len);
604 if (written != frame_len) {
606 LOGE(
"Failed to write complete frame");
611 last_frame_size = frame_len;
612 header = temp_header;
613 frame_header_valid =
true;
625 bool validate(
const uint8_t* data,
size_t len) {
628 return FrameReason::VALID == validateFrameHeader(header);
631 bool contains(
const uint8_t* data,
const char* toFind,
size_t len) {
632 if (data ==
nullptr || len == 0)
return false;
633 int find_str_len = strlen(toFind);
634 for (
int j = 0; j < len - find_str_len; j++) {
635 if (memcmp(data + j, toFind, find_str_len) == 0)
return true;
643 int seekFrameSync(
const uint8_t* str,
size_t len) {
644 for (
int j = 0; j < static_cast<int>(len) - 1; j++) {
646 if (str[j] == 0xFF && (str[j + 1] & 0xE0) == 0xE0) {
653 void readFrameHeader(
const uint8_t* data) {
654 if (!FrameHeader::decode(data, header))
return;
659 enum class FrameReason {
661 INVALID_BITRATE_FOR_VERSION,
662 INVALID_SAMPLERATE_FOR_VERSION,
663 INVALID_MPEG_VERSION,
665 INVALID_LAYER_II_BITRATE_AND_MODE,
670 FrameReason validateFrameHeader(
const FrameHeader& header) {
671 if (header.audioVersion == FrameHeader::MPEGVersionID::INVALID) {
672 LOGI(
"invalid mpeg version");
673 return FrameReason::INVALID_MPEG_VERSION;
676 if (header.layer == FrameHeader::LayerID::INVALID) {
677 LOGI(
"invalid layer");
678 return FrameReason::INVALID_LAYER;
681 if (header.getBitRate() <= 0) {
682 LOGI(
"invalid bitrate");
683 return FrameReason::INVALID_BITRATE_FOR_VERSION;
686 if (header.getSampleRate() ==
687 (
unsigned short)FrameHeader::SpecialSampleRate::RESERVED) {
688 LOGI(
"invalid samplerate");
689 return FrameReason::INVALID_SAMPLERATE_FOR_VERSION;
694 if (header.layer == FrameHeader::LayerID::LAYER_2) {
695 if (header.channelMode == FrameHeader::ChannelModeID::SINGLE) {
696 if (header.getBitRate() >= 224000) {
697 LOGI(
"invalid bitrate >224000");
698 return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
701 if (header.getBitRate() >= 32000 && header.getBitRate() <= 56000) {
702 LOGI(
"invalid bitrate >32000");
703 return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
706 if (header.getBitRate() == 80000) {
707 LOGI(
"invalid bitrate >80000");
708 return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
713 if (header.emphasis == FrameHeader::EmphasisID::INVALID) {
714 LOGI(
"invalid Emphasis");
715 return FrameReason::INVALID_EMPHASIS;
718 return FrameReason::VALID;