arduino-audio-tools
All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends Modules Pages
CodecWavIMA.h
1#pragma once
2
3#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
4
5#define WAVE_FORMAT_IMA_ADPCM 0x0011
6#define TAG(a, b, c, d) ((static_cast<uint32_t>(a) << 24) | (static_cast<uint32_t>(b) << 16) | (static_cast<uint32_t>(c) << 8) | (d))
7#define READ_BUFFER_SIZE 512
8
9namespace audio_tools {
10
11const int16_t ima_index_table[16] {
12 -1, -1, -1, -1, 2, 4, 6, 8,
13 -1, -1, -1, -1, 2, 4, 6, 8
14};
15
16const int32_t ima_step_table[89] {
17 7, 8, 9, 10, 11, 12, 13, 14, 16, 17,
18 19, 21, 23, 25, 28, 31, 34, 37, 41, 45,
19 50, 55, 60, 66, 73, 80, 88, 97, 107, 118,
20 130, 143, 157, 173, 190, 209, 230, 253, 279, 307,
21 337, 371, 408, 449, 494, 544, 598, 658, 724, 796,
22 876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066,
23 2272, 2499, 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358,
24 5894, 6484, 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899,
25 15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767
26};
27
36 WavIMAAudioInfo() = default;
37 WavIMAAudioInfo(const AudioInfo& from) {
39 channels = from.channels;
41 }
42
43 int format = WAVE_FORMAT_IMA_ADPCM;
44 int byte_rate = 0;
45 int block_align = 0;
46 int frames_per_block = 0;
47 int num_samples = 0;
48 bool is_valid = false;
49 uint32_t data_length = 0;
50 uint32_t file_size = 0;
51};
52
53struct IMAState {
54 int32_t predictor = 0;
55 int step_index = 0;
56};
57
58const char* wav_ima_mime = "audio/x-wav";
59
68typedef enum {
69 IMA_ERR_INVALID_CHUNK = -2,
70 IMA_ERR_INVALID_CONTAINER,
71 IMA_CHUNK_OK,
72 IMA_CHUNK_UNKNOWN
74
76 public:
77 WavIMAHeader() {
78 clearHeader();
79 };
80
81 void clearHeader() {
82 data_pos = 0;
83 memset((void*)&headerInfo, 0, sizeof(WavIMAAudioInfo));
84 headerInfo.is_valid = false;
85 header_complete = false;
86 chunk_len = 0;
87 max_chunk_len = 8;
88 skip_len = 0;
89 isFirstChunk = true;
90 }
91
92 chunk_result parseChunk() {
93 data_pos = 0;
94 bool chunkUnknown = false;
95 uint32_t tag = read_tag();
96 uint32_t length = read_int32();
97 if (length < 4) {
98 return IMA_ERR_INVALID_CHUNK;
99 }
100 if (tag == TAG('R', 'I', 'F', 'F')) {
101 uint32_t container_type = read_tag();
102 if (container_type != TAG('W', 'A', 'V', 'E')) {
103 return IMA_ERR_INVALID_CONTAINER;
104 }
105 }
106 else if (tag == TAG('f', 'm', 't', ' ')) {
107 if (length < 20) {
108 // Insufficient data for 'fmt '
109 return IMA_ERR_INVALID_CHUNK;
110 }
111 headerInfo.format = read_int16();
112 headerInfo.channels = read_int16();
113 headerInfo.sample_rate = read_int32();
114 headerInfo.byte_rate = read_int32();
115 headerInfo.block_align = read_int16();
116 headerInfo.bits_per_sample = read_int16();
117
118 // Skip the size parameter for extra information as for IMA ADPCM the following data should always be 2 bytes.
119 skip(2);
120 headerInfo.frames_per_block = read_int16();
121 if (headerInfo.format != WAVE_FORMAT_IMA_ADPCM || headerInfo.channels > 2) {
122 // Insufficient or invalid data for waveformatex
123 LOGE("Format not supported: %d, %d\n", headerInfo.format, headerInfo.channels);
124 return IMA_ERR_INVALID_CHUNK;
125 } else {
126 headerInfo.is_valid = true; // At this point we know that the format information is valid
127 }
128 } else if (tag == TAG('f', 'a', 'c', 't')) {
129 /* In the context of ADPCM the fact chunk should contain the total number of mono or stereo samples
130 however we shouldn't rely on this as some programs (e.g. Audacity) write an incorrect value in some cases. This value is currently not used by the decoder.
131 */
132 headerInfo.num_samples = read_int32();
133 } else if (tag == TAG('d', 'a', 't', 'a')) {
134 // Size of the data chunk.
135 headerInfo.data_length = length;
136 } else {
137 chunkUnknown = true;
138 }
139 // Skip any remaining data that exceeds the buffer
140 if (tag != TAG('R', 'I', 'F', 'F') && length > 20) skip_len = length - 20;
141 return chunkUnknown ? IMA_CHUNK_UNKNOWN : IMA_CHUNK_OK;
142 }
143
144 /* Adds data to the header data buffer
145 Because the header isn't necessarily uniform, we go through each chunk individually
146 and only copy the ones we need. This could probably still be optimized. */
147 int write(uint8_t* data, size_t data_len) {
148 int write_len;
149 int data_offset = 0;
150 while (data_len > 0 && !header_complete) {
151 if (skip_len > 0) {
152 /* Used to skip any unknown chunks or chunks that are longer than expected.
153 Some encoders like ffmpeg write meta information before the "data" chunk by default. */
154 write_len = min(skip_len, data_len);
155 skip_len -= write_len;
156 data_offset += write_len;
157 data_len -= write_len;
158 }
159 else {
160 // Search / Wait for the individual chunks and write them to the temporary buffer.
161 write_len = min(data_len, max_chunk_len - chunk_len);
162 memmove(chunk_buffer + chunk_len, data + data_offset, write_len);
163 chunk_len += write_len;
164 data_offset += write_len;
165 data_len -= write_len;
166
167 if (chunk_len == max_chunk_len) {
168 data_pos = 0;
169 if (max_chunk_len == 8) {
170 uint32_t chunk_tag = read_tag();
171 uint32_t chunk_size = read_int32();
172 if (isFirstChunk && chunk_tag != TAG('R', 'I', 'F', 'F')) {
173 headerInfo.is_valid = false;
174 return IMA_ERR_INVALID_CONTAINER;
175 }
176 isFirstChunk = false;
177 if (chunk_tag == TAG('R', 'I', 'F', 'F')) chunk_size = 4;
178 else if (chunk_tag == TAG('d', 'a', 't', 'a')) {
179 parseChunk();
180 header_complete = true;
181 logInfo();
182 break;
183 }
184
185 /* Wait for the rest of the data before processing the chunk.
186 The largest chunk we expect is the "fmt " chunk which is 20 bytes long in this case. */
187 write_len = min((size_t)chunk_size, (size_t)20);
188 max_chunk_len += write_len;
189 continue;
190 }
191 else {
192 chunk_result result = parseChunk();
193 switch (result) {
194 // Abort processing the header if the RIFF container or a required chunk is not valid
195 case IMA_ERR_INVALID_CONTAINER:
196 case IMA_ERR_INVALID_CHUNK:
197 headerInfo.is_valid = false;
198 return result;
199 break;
200 }
201 chunk_len = 0;
202 max_chunk_len = 8;
203 }
204 }
205 }
206 }
207 return data_offset;
208 }
209
212 return header_complete;
213 }
214
215 // provides the AudioInfo
216 WavIMAAudioInfo &audioInfo() {
217 return headerInfo;
218 }
219
220 protected:
221 struct WavIMAAudioInfo headerInfo;
222 uint8_t chunk_buffer[28];
223 size_t chunk_len = 0;
224 size_t max_chunk_len = 8;
225 size_t skip_len = 0;
226 size_t data_pos = 0;
227 bool header_complete = false;
228 bool isFirstChunk = true;
229
230 uint32_t read_tag() {
231 uint32_t tag = getChar();
232 tag = (tag << 8) | getChar();
233 tag = (tag << 8) | getChar();
234 tag = (tag << 8) | getChar();
235 return tag;
236 }
237
238 uint32_t read_int32() {
239 uint32_t value = (uint32_t)getChar();
240 value |= (uint32_t)getChar() << 8;
241 value |= (uint32_t)getChar() << 16;
242 value |= (uint32_t)getChar() << 24;
243 return value;
244 }
245
246 uint16_t read_int16() {
247 uint16_t value = getChar();
248 value |= getChar() << 8;
249 return value;
250 }
251
252 void skip(int n) {
253 n = min((size_t)n, chunk_len - data_pos);
254 for (int i=0; i<n; i++) if (data_pos < chunk_len) data_pos++;
255 return;
256 }
257
258 int getChar() {
259 if (data_pos < chunk_len) return chunk_buffer[data_pos++];
260 else return -1;
261 }
262
263 void logInfo() {
264 LOGI("WavIMAHeader format: %d", headerInfo.format);
265 LOGI("WavIMAHeader channels: %d", headerInfo.channels);
266 LOGI("WavIMAHeader sample_rate: %d", headerInfo.sample_rate);
267 LOGI("WavIMAHeader block align: %d", headerInfo.block_align);
268 LOGI("WavIMAHeader bits_per_sample: %d", headerInfo.bits_per_sample);
269 }
270};
271
272
287 public:
293 TRACED();
294 }
295
301 WavIMADecoder(Print &out_stream, bool active=true) {
302 TRACED();
303 this->out = &out_stream;
304 this->active = active;
305 }
306
315 TRACED();
316 this->out = &out_stream;
318 }
319
321 if (input_buffer != nullptr) delete[] input_buffer;
322 if (output_buffer != nullptr) delete[] output_buffer;
323 }
324
326 void setOutput(Print &out_stream) {
327 this->out = &out_stream;
328 }
329
330 bool begin() {
331 TRACED();
332 ima_states[0].predictor = 0;
333 ima_states[0].step_index = 0;
334 ima_states[1].predictor = 0;
335 ima_states[1].step_index = 0;
336 isFirst = true;
337 active = true;
338 header.clearHeader();
339 return true;
340 }
341
342 void end() {
343 TRACED();
344 active = false;
345 }
346
347 const char* mime() {
348 return wav_ima_mime;
349 }
350
351 WavIMAAudioInfo &audioInfoEx() {
352 return header.audioInfo();
353 }
354
355 AudioInfo audioInfo() override {
356 return header.audioInfo();
357 }
358
359 virtual size_t write(const uint8_t *data, size_t len) {
360 TRACED();
361 if (active) {
362 if (isFirst) {
363 // we expect at least the full header
364 int written = header.write((uint8_t*)data, len);
365 if (written == IMA_ERR_INVALID_CONTAINER || written == IMA_ERR_INVALID_CHUNK) {
366 isValid = false;
367 isFirst = false;
368 LOGE("File is not valid");
369 return len;
370 }
371
372 if (!header.isDataComplete()) {
373 return len;
374 }
375
376 size_t len_open = len - written;
377 uint8_t *sound_ptr = (uint8_t *) data + written;
378 isFirst = false;
379 isValid = header.audioInfo().is_valid;
380
381 LOGI("WAV sample_rate: %d", header.audioInfo().sample_rate);
382 LOGI("WAV data_length: %u", (unsigned) header.audioInfo().data_length);
383 LOGI("WAV is_valid: %s", header.audioInfo().is_valid ? "true" : "false");
384
385 isValid = header.audioInfo().is_valid;
386 if (isValid) {
387 if (input_buffer != nullptr) delete[] input_buffer;
388 if (output_buffer != nullptr) delete[] output_buffer;
389 bytes_per_encoded_block = header.audioInfo().block_align;
390 bytes_per_decoded_block = header.audioInfo().frames_per_block * header.audioInfo().channels * 2;
391 samples_per_decoded_block = bytes_per_decoded_block >> 1;
392 input_buffer = new uint8_t[bytes_per_encoded_block];
393 output_buffer = new int16_t[samples_per_decoded_block];
394 // update sampling rate if the target supports it
395 AudioInfo bi;
396 bi.sample_rate = header.audioInfo().sample_rate;
397 bi.channels = header.audioInfo().channels;
398 bi.bits_per_sample = 16;
399 remaining_bytes = header.audioInfo().data_length;
400 notifyAudioChange(bi);
401 // write prm data from first record
402 LOGI("WavIMADecoder writing first sound data");
403 processInput(sound_ptr, len_open);
404 }
405 } else if (isValid) {
406 processInput((uint8_t*)data, len);
407 }
408 }
409 return len;
410 }
411
414 TRACED();
415 uint8_t buffer[READ_BUFFER_SIZE];
416 int len = in.readBytes(buffer, READ_BUFFER_SIZE);
417 return write(buffer, len);
418 }
419
420 virtual operator bool() {
421 return active;
422 }
423
424 protected:
425 WavIMAHeader header;
426 Print *out;
427 bool isFirst = true;
428 bool isValid = true;
429 bool active;
430 uint8_t *input_buffer = nullptr;
431 int32_t input_pos = 0;
432 size_t remaining_bytes = 0;
433 size_t bytes_per_encoded_block = 0;
434 int16_t *output_buffer = nullptr;
435 size_t bytes_per_decoded_block = 0;
436 size_t samples_per_decoded_block = 0;
437 IMAState ima_states[2];
438
439 int16_t decodeSample(uint8_t sample, int channel = 0) {
440 int step_index = ima_states[channel].step_index;
441 int32_t step = ima_step_table[step_index];
442 step_index += ima_index_table[sample];
443 if (step_index < 0) step_index = 0;
444 else if (step_index > 88) step_index = 88;
445 ima_states[channel].step_index = step_index;
446 int32_t predictor = ima_states[channel].predictor;
447 uint8_t sign = sample & 8;
448 uint8_t delta = sample & 7;
449 int32_t diff = step >> 3;
450 if (delta & 4) diff += step;
451 if (delta & 2) diff += (step >> 1);
452 if (delta & 1) diff += (step >> 2);
453 if (sign) predictor -= diff;
454 else predictor += diff;
455 if (predictor < -32768) predictor = -32768;
456 else if (predictor > 32767) predictor = 32767;
457 ima_states[channel].predictor = predictor;
458 return (int16_t)predictor;
459 }
460
461 void decodeBlock(int channels) {
462 if (channels == 0 || channels > 2) return;
463 input_pos = 4;
464 int output_pos = 1;
465 ima_states[0].predictor = (int16_t)((input_buffer[1] << 8) + input_buffer[0]);
466 ima_states[0].step_index = input_buffer[2];
467 output_buffer[0] = ima_states[0].predictor;
468 if (channels == 2) {
469 ima_states[1].predictor = (int16_t)(input_buffer[5] << 8) + input_buffer[4];
470 ima_states[1].step_index = input_buffer[6];
471 output_buffer[1] = ima_states[1].predictor;
472 input_pos = 8;
473 output_pos = 2;
474 }
475 for (int i=0; i<samples_per_decoded_block-channels; i++) {
476 uint8_t sample = (i & 1) ? input_buffer[input_pos++] >> 4 : input_buffer[input_pos] & 15;
477 if (channels == 1) output_buffer[output_pos++] = decodeSample(sample);
478 else {
479 output_buffer[output_pos] = decodeSample(sample, (i >> 3) & 1);
480 output_pos += 2;
481 if ((i & 15) == 7) output_pos -= 15;
482 else if ((i & 15) == 15) output_pos--;
483 }
484 }
485 }
486
487 void processInput(const uint8_t* data, size_t size) {
488 int max_size = min(size, remaining_bytes);
489 for (int i=0; i<max_size; i++) {
490 input_buffer[input_pos++] = data[i];
491 if (input_pos == bytes_per_encoded_block) {
492 decodeBlock(header.audioInfo().channels);
493 input_pos = 0;
494 out->write((uint8_t*)output_buffer, bytes_per_decoded_block);
495 }
496 }
497 remaining_bytes -= max_size;
498 if (remaining_bytes == 0) active = false;
499 }
500};
501
502}
Decoding of encoded audio into PCM data.
Definition AudioCodecsBase.h:18
virtual void addNotifyAudioChange(AudioInfoSupport &bi)
Adds target to be notified about audio changes.
Definition AudioTypes.h:151
Supports changes to the sampling rate, bits and channels.
Definition AudioTypes.h:133
Definition NoArduino.h:62
Definition NoArduino.h:142
Obsolete: WavIMADecoder - based on WAVDecoder - We parse the header data as we receive it and send th...
Definition CodecWavIMA.h:286
WavIMADecoder()
Construct a new WavIMADecoder object.
Definition CodecWavIMA.h:292
WavIMADecoder(Print &out_stream, AudioInfoSupport &bi)
Construct a new WavIMADecoder object.
Definition CodecWavIMA.h:314
void setOutput(Print &out_stream)
Defines the output Stream.
Definition CodecWavIMA.h:326
int readStream(Stream &in)
Alternative API which provides the data from an input stream.
Definition CodecWavIMA.h:413
AudioInfo audioInfo() override
provides the actual input AudioInfo
Definition CodecWavIMA.h:355
WavIMADecoder(Print &out_stream, bool active=true)
Construct a new WavIMADecoder object.
Definition CodecWavIMA.h:301
Definition CodecWavIMA.h:75
bool isDataComplete()
Returns true if the header is complete (data chunk has been found)
Definition CodecWavIMA.h:211
Generic Implementation of sound input and output for desktop environments using portaudio.
Definition AudioCodecsBase.h:10
chunk_result
Parser for Wav header data adjusted for IMA ADPCM format - partially based on CodecWAV....
Definition CodecWavIMA.h:68
Basic Audio information which drives e.g. I2S.
Definition AudioTypes.h:53
sample_rate_t sample_rate
Sample Rate: e.g 44100.
Definition AudioTypes.h:55
uint16_t channels
Number of channels: 2=stereo, 1=mono.
Definition AudioTypes.h:57
uint8_t bits_per_sample
Number of bits per sample (int16_t = 16 bits)
Definition AudioTypes.h:59
Definition CodecWavIMA.h:53
Sound information which is available in the WAV header - adjusted for IMA ADPCM.
Definition CodecWavIMA.h:35