arduino-audio-tools
CodecWavIMA.h
1 #pragma once
2 
3 #include "AudioTools/AudioCodecs/AudioCodecsBase.h"
4 
5 #define WAVE_FORMAT_IMA_ADPCM 0x0011
6 #define TAG(a, b, c, d) ((static_cast<uint32_t>(a) << 24) | (static_cast<uint32_t>(b) << 16) | (static_cast<uint32_t>(c) << 8) | (d))
7 #define READ_BUFFER_SIZE 512
8 
9 namespace audio_tools {
10 
11 const int16_t ima_index_table[16] {
12  -1, -1, -1, -1, 2, 4, 6, 8,
13  -1, -1, -1, -1, 2, 4, 6, 8
14 };
15 
16 const int32_t ima_step_table[89] {
17  7, 8, 9, 10, 11, 12, 13, 14, 16, 17,
18  19, 21, 23, 25, 28, 31, 34, 37, 41, 45,
19  50, 55, 60, 66, 73, 80, 88, 97, 107, 118,
20  130, 143, 157, 173, 190, 209, 230, 253, 279, 307,
21  337, 371, 408, 449, 494, 544, 598, 658, 724, 796,
22  876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066,
23  2272, 2499, 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358,
24  5894, 6484, 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899,
25  15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767
26 };
27 
36  WavIMAAudioInfo() = default;
37  WavIMAAudioInfo(const AudioInfo& from) {
38  sample_rate = from.sample_rate;
39  channels = from.channels;
41  }
42 
43  int format = WAVE_FORMAT_IMA_ADPCM;
44  int byte_rate = 0;
45  int block_align = 0;
46  int frames_per_block = 0;
47  int num_samples = 0;
48  bool is_valid = false;
49  uint32_t data_length = 0;
50  uint32_t file_size = 0;
51 };
52 
53 struct IMAState {
54  int32_t predictor = 0;
55  int step_index = 0;
56 };
57 
58 const char* wav_ima_mime = "audio/x-wav";
59 
68 typedef enum {
69  IMA_ERR_INVALID_CHUNK = -2,
70  IMA_ERR_INVALID_CONTAINER,
71  IMA_CHUNK_OK,
72  IMA_CHUNK_UNKNOWN
73 } chunk_result;
74 
75 class WavIMAHeader {
76  public:
77  WavIMAHeader() {
78  clearHeader();
79  };
80 
81  void clearHeader() {
82  data_pos = 0;
83  memset((void*)&headerInfo, 0, sizeof(WavIMAAudioInfo));
84  headerInfo.is_valid = false;
85  header_complete = false;
86  chunk_len = 0;
87  max_chunk_len = 8;
88  skip_len = 0;
89  isFirstChunk = true;
90  }
91 
92  chunk_result parseChunk() {
93  data_pos = 0;
94  bool chunkUnknown = false;
95  uint32_t tag = read_tag();
96  uint32_t length = read_int32();
97  if (length < 4) {
98  return IMA_ERR_INVALID_CHUNK;
99  }
100  if (tag == TAG('R', 'I', 'F', 'F')) {
101  uint32_t container_type = read_tag();
102  if (container_type != TAG('W', 'A', 'V', 'E')) {
103  return IMA_ERR_INVALID_CONTAINER;
104  }
105  }
106  else if (tag == TAG('f', 'm', 't', ' ')) {
107  if (length < 20) {
108  // Insufficient data for 'fmt '
109  return IMA_ERR_INVALID_CHUNK;
110  }
111  headerInfo.format = read_int16();
112  headerInfo.channels = read_int16();
113  headerInfo.sample_rate = read_int32();
114  headerInfo.byte_rate = read_int32();
115  headerInfo.block_align = read_int16();
116  headerInfo.bits_per_sample = read_int16();
117 
118  // Skip the size parameter for extra information as for IMA ADPCM the following data should always be 2 bytes.
119  skip(2);
120  headerInfo.frames_per_block = read_int16();
121  if (headerInfo.format != WAVE_FORMAT_IMA_ADPCM || headerInfo.channels > 2) {
122  // Insufficient or invalid data for waveformatex
123  LOGE("Format not supported: %d, %d\n", headerInfo.format, headerInfo.channels);
124  return IMA_ERR_INVALID_CHUNK;
125  } else {
126  headerInfo.is_valid = true; // At this point we know that the format information is valid
127  }
128  } else if (tag == TAG('f', 'a', 'c', 't')) {
129  /* In the context of ADPCM the fact chunk should contain the total number of mono or stereo samples
130  however we shouldn't rely on this as some programs (e.g. Audacity) write an incorrect value in some cases. This value is currently not used by the decoder.
131  */
132  headerInfo.num_samples = read_int32();
133  } else if (tag == TAG('d', 'a', 't', 'a')) {
134  // Size of the data chunk.
135  headerInfo.data_length = length;
136  } else {
137  chunkUnknown = true;
138  }
139  // Skip any remaining data that exceeds the buffer
140  if (tag != TAG('R', 'I', 'F', 'F') && length > 20) skip_len = length - 20;
141  return chunkUnknown ? IMA_CHUNK_UNKNOWN : IMA_CHUNK_OK;
142  }
143 
144  /* Adds data to the header data buffer
145  Because the header isn't necessarily uniform, we go through each chunk individually
146  and only copy the ones we need. This could probably still be optimized. */
147  int write(uint8_t* data, size_t data_len) {
148  int write_len;
149  int data_offset = 0;
150  while (data_len > 0 && !header_complete) {
151  if (skip_len > 0) {
152  /* Used to skip any unknown chunks or chunks that are longer than expected.
153  Some encoders like ffmpeg write meta information before the "data" chunk by default. */
154  write_len = min(skip_len, data_len);
155  skip_len -= write_len;
156  data_offset += write_len;
157  data_len -= write_len;
158  }
159  else {
160  // Search / Wait for the individual chunks and write them to the temporary buffer.
161  write_len = min(data_len, max_chunk_len - chunk_len);
162  memmove(chunk_buffer + chunk_len, data + data_offset, write_len);
163  chunk_len += write_len;
164  data_offset += write_len;
165  data_len -= write_len;
166 
167  if (chunk_len == max_chunk_len) {
168  data_pos = 0;
169  if (max_chunk_len == 8) {
170  uint32_t chunk_tag = read_tag();
171  uint32_t chunk_size = read_int32();
172  if (isFirstChunk && chunk_tag != TAG('R', 'I', 'F', 'F')) {
173  headerInfo.is_valid = false;
174  return IMA_ERR_INVALID_CONTAINER;
175  }
176  isFirstChunk = false;
177  if (chunk_tag == TAG('R', 'I', 'F', 'F')) chunk_size = 4;
178  else if (chunk_tag == TAG('d', 'a', 't', 'a')) {
179  parseChunk();
180  header_complete = true;
181  logInfo();
182  break;
183  }
184 
185  /* Wait for the rest of the data before processing the chunk.
186  The largest chunk we expect is the "fmt " chunk which is 20 bytes long in this case. */
187  write_len = min((size_t)chunk_size, (size_t)20);
188  max_chunk_len += write_len;
189  continue;
190  }
191  else {
192  chunk_result result = parseChunk();
193  switch (result) {
194  // Abort processing the header if the RIFF container or a required chunk is not valid
195  case IMA_ERR_INVALID_CONTAINER:
196  case IMA_ERR_INVALID_CHUNK:
197  headerInfo.is_valid = false;
198  return result;
199  break;
200  }
201  chunk_len = 0;
202  max_chunk_len = 8;
203  }
204  }
205  }
206  }
207  return data_offset;
208  }
209 
211  bool isDataComplete() {
212  return header_complete;
213  }
214 
215  // provides the AudioInfo
216  WavIMAAudioInfo &audioInfo() {
217  return headerInfo;
218  }
219 
220  protected:
221  struct WavIMAAudioInfo headerInfo;
222  uint8_t chunk_buffer[28];
223  size_t chunk_len = 0;
224  size_t max_chunk_len = 8;
225  size_t skip_len = 0;
226  size_t data_pos = 0;
227  bool header_complete = false;
228  bool isFirstChunk = true;
229 
230  uint32_t read_tag() {
231  uint32_t tag = getChar();
232  tag = (tag << 8) | getChar();
233  tag = (tag << 8) | getChar();
234  tag = (tag << 8) | getChar();
235  return tag;
236  }
237 
238  uint32_t read_int32() {
239  uint32_t value = (uint32_t)getChar();
240  value |= (uint32_t)getChar() << 8;
241  value |= (uint32_t)getChar() << 16;
242  value |= (uint32_t)getChar() << 24;
243  return value;
244  }
245 
246  uint16_t read_int16() {
247  uint16_t value = getChar();
248  value |= getChar() << 8;
249  return value;
250  }
251 
252  void skip(int n) {
253  n = min((size_t)n, chunk_len - data_pos);
254  for (int i=0; i<n; i++) if (data_pos < chunk_len) data_pos++;
255  return;
256  }
257 
258  int getChar() {
259  if (data_pos < chunk_len) return chunk_buffer[data_pos++];
260  else return -1;
261  }
262 
263  void logInfo() {
264  LOGI("WavIMAHeader format: %d", headerInfo.format);
265  LOGI("WavIMAHeader channels: %d", headerInfo.channels);
266  LOGI("WavIMAHeader sample_rate: %d", headerInfo.sample_rate);
267  LOGI("WavIMAHeader block align: %d", headerInfo.block_align);
268  LOGI("WavIMAHeader bits_per_sample: %d", headerInfo.bits_per_sample);
269  }
270 };
271 
272 
286 class WavIMADecoder : public AudioDecoder {
287  public:
293  TRACED();
294  }
295 
301  WavIMADecoder(Print &out_stream, bool active=true) {
302  TRACED();
303  this->out = &out_stream;
304  this->active = active;
305  }
306 
315  TRACED();
316  this->out = &out_stream;
318  }
319 
320  ~WavIMADecoder() {
321  if (input_buffer != nullptr) delete[] input_buffer;
322  if (output_buffer != nullptr) delete[] output_buffer;
323  }
324 
326  void setOutput(Print &out_stream) {
327  this->out = &out_stream;
328  }
329 
330  bool begin() {
331  TRACED();
332  ima_states[0].predictor = 0;
333  ima_states[0].step_index = 0;
334  ima_states[1].predictor = 0;
335  ima_states[1].step_index = 0;
336  isFirst = true;
337  active = true;
338  header.clearHeader();
339  return true;
340  }
341 
342  void end() {
343  TRACED();
344  active = false;
345  }
346 
347  const char* mime() {
348  return wav_ima_mime;
349  }
350 
351  WavIMAAudioInfo &audioInfoEx() {
352  return header.audioInfo();
353  }
354 
355  AudioInfo audioInfo() override {
356  return header.audioInfo();
357  }
358 
359  virtual size_t write(const uint8_t *data, size_t len) {
360  TRACED();
361  if (active) {
362  if (isFirst) {
363  // we expect at least the full header
364  int written = header.write((uint8_t*)data, len);
365  if (written == IMA_ERR_INVALID_CONTAINER || written == IMA_ERR_INVALID_CHUNK) {
366  isValid = false;
367  isFirst = false;
368  LOGE("File is not valid");
369  return len;
370  }
371 
372  if (!header.isDataComplete()) {
373  return len;
374  }
375 
376  size_t len_open = len - written;
377  uint8_t *sound_ptr = (uint8_t *) data + written;
378  isFirst = false;
379  isValid = header.audioInfo().is_valid;
380 
381  LOGI("WAV sample_rate: %d", header.audioInfo().sample_rate);
382  LOGI("WAV data_length: %u", (unsigned) header.audioInfo().data_length);
383  LOGI("WAV is_valid: %s", header.audioInfo().is_valid ? "true" : "false");
384 
385  isValid = header.audioInfo().is_valid;
386  if (isValid) {
387  if (input_buffer != nullptr) delete[] input_buffer;
388  if (output_buffer != nullptr) delete[] output_buffer;
389  bytes_per_encoded_block = header.audioInfo().block_align;
390  bytes_per_decoded_block = header.audioInfo().frames_per_block * header.audioInfo().channels * 2;
391  samples_per_decoded_block = bytes_per_decoded_block >> 1;
392  input_buffer = new uint8_t[bytes_per_encoded_block];
393  output_buffer = new int16_t[samples_per_decoded_block];
394  // update sampling rate if the target supports it
395  AudioInfo bi;
396  bi.sample_rate = header.audioInfo().sample_rate;
397  bi.channels = header.audioInfo().channels;
398  bi.bits_per_sample = 16;
399  remaining_bytes = header.audioInfo().data_length;
400  notifyAudioChange(bi);
401  // write prm data from first record
402  LOGI("WavIMADecoder writing first sound data");
403  processInput(sound_ptr, len_open);
404  }
405  } else if (isValid) {
406  processInput((uint8_t*)data, len);
407  }
408  }
409  return len;
410  }
411 
413  int readStream(Stream &in) {
414  TRACED();
415  uint8_t buffer[READ_BUFFER_SIZE];
416  int len = in.readBytes(buffer, READ_BUFFER_SIZE);
417  return write(buffer, len);
418  }
419 
420  virtual operator bool() {
421  return active;
422  }
423 
424  protected:
425  WavIMAHeader header;
426  Print *out;
427  bool isFirst = true;
428  bool isValid = true;
429  bool active;
430  uint8_t *input_buffer = nullptr;
431  int32_t input_pos = 0;
432  size_t remaining_bytes = 0;
433  size_t bytes_per_encoded_block = 0;
434  int16_t *output_buffer = nullptr;
435  size_t bytes_per_decoded_block = 0;
436  size_t samples_per_decoded_block = 0;
437  IMAState ima_states[2];
438 
439  int16_t decodeSample(uint8_t sample, int channel = 0) {
440  int step_index = ima_states[channel].step_index;
441  int32_t step = ima_step_table[step_index];
442  step_index += ima_index_table[sample];
443  if (step_index < 0) step_index = 0;
444  else if (step_index > 88) step_index = 88;
445  ima_states[channel].step_index = step_index;
446  int32_t predictor = ima_states[channel].predictor;
447  uint8_t sign = sample & 8;
448  uint8_t delta = sample & 7;
449  int32_t diff = step >> 3;
450  if (delta & 4) diff += step;
451  if (delta & 2) diff += (step >> 1);
452  if (delta & 1) diff += (step >> 2);
453  if (sign) predictor -= diff;
454  else predictor += diff;
455  if (predictor < -32768) predictor = -32768;
456  else if (predictor > 32767) predictor = 32767;
457  ima_states[channel].predictor = predictor;
458  return (int16_t)predictor;
459  }
460 
461  void decodeBlock(int channels) {
462  if (channels == 0 || channels > 2) return;
463  input_pos = 4;
464  int output_pos = 1;
465  ima_states[0].predictor = (int16_t)((input_buffer[1] << 8) + input_buffer[0]);
466  ima_states[0].step_index = input_buffer[2];
467  output_buffer[0] = ima_states[0].predictor;
468  if (channels == 2) {
469  ima_states[1].predictor = (int16_t)(input_buffer[5] << 8) + input_buffer[4];
470  ima_states[1].step_index = input_buffer[6];
471  output_buffer[1] = ima_states[1].predictor;
472  input_pos = 8;
473  output_pos = 2;
474  }
475  for (int i=0; i<samples_per_decoded_block-channels; i++) {
476  uint8_t sample = (i & 1) ? input_buffer[input_pos++] >> 4 : input_buffer[input_pos] & 15;
477  if (channels == 1) output_buffer[output_pos++] = decodeSample(sample);
478  else {
479  output_buffer[output_pos] = decodeSample(sample, (i >> 3) & 1);
480  output_pos += 2;
481  if ((i & 15) == 7) output_pos -= 15;
482  else if ((i & 15) == 15) output_pos--;
483  }
484  }
485  }
486 
487  void processInput(const uint8_t* data, size_t size) {
488  int max_size = min(size, remaining_bytes);
489  for (int i=0; i<max_size; i++) {
490  input_buffer[input_pos++] = data[i];
491  if (input_pos == bytes_per_encoded_block) {
492  decodeBlock(header.audioInfo().channels);
493  input_pos = 0;
494  out->write((uint8_t*)output_buffer, bytes_per_decoded_block);
495  }
496  }
497  remaining_bytes -= max_size;
498  if (remaining_bytes == 0) active = false;
499  }
500 };
501 
502 }
Docoding of encoded audio into PCM data.
Definition: AudioCodecsBase.h:16
virtual void addNotifyAudioChange(AudioInfoSupport &bi)
Adds target to be notified about audio changes.
Definition: AudioTypes.h:162
Supports changes to the sampling rate, bits and channels.
Definition: AudioTypes.h:139
Definition: NoArduino.h:58
Definition: NoArduino.h:125
Obsolete: WavIMADecoder - based on WAVDecoder - We parse the header data as we receive it and send th...
Definition: CodecWavIMA.h:286
WavIMADecoder()
Construct a new WavIMADecoder object.
Definition: CodecWavIMA.h:292
WavIMADecoder(Print &out_stream, AudioInfoSupport &bi)
Construct a new WavIMADecoder object.
Definition: CodecWavIMA.h:314
void setOutput(Print &out_stream)
Defines the output Stream.
Definition: CodecWavIMA.h:326
int readStream(Stream &in)
Alternative API which provides the data from an input stream.
Definition: CodecWavIMA.h:413
AudioInfo audioInfo() override
provides the actual input AudioInfo
Definition: CodecWavIMA.h:355
WavIMADecoder(Print &out_stream, bool active=true)
Construct a new WavIMADecoder object.
Definition: CodecWavIMA.h:301
Definition: CodecWavIMA.h:75
bool isDataComplete()
Returns true if the header is complete (data chunk has been found)
Definition: CodecWavIMA.h:211
Generic Implementation of sound input and output for desktop environments using portaudio.
Definition: AudioConfig.h:868
chunk_result
Parser for Wav header data adjusted for IMA ADPCM format - partially based on CodecWAV....
Definition: CodecWavIMA.h:68
Basic Audio information which drives e.g. I2S.
Definition: AudioTypes.h:52
sample_rate_t sample_rate
Sample Rate: e.g 44100.
Definition: AudioTypes.h:55
uint16_t channels
Number of channels: 2=stereo, 1=mono.
Definition: AudioTypes.h:57
uint8_t bits_per_sample
Number of bits per sample (int16_t = 16 bits)
Definition: AudioTypes.h:59
Definition: CodecWavIMA.h:53
Sound information which is available in the WAV header - adjusted for IMA ADPCM.
Definition: CodecWavIMA.h:35