arduino-audio-tools
CodecWAV.h
1 #pragma once
2 
3 #include "AudioCodecs/AudioEncoded.h"
5 
6 
7 #define TAG(a, b, c, d) \
8  ((static_cast<uint32_t>(a) << 24) | (static_cast<uint32_t>(b) << 16) | \
9  (static_cast<uint32_t>(c) << 8) | (d))
10 #define READ_BUFFER_SIZE 512
11 
12 namespace audio_tools {
13 
21  WAVAudioInfo() = default;
22  WAVAudioInfo(const AudioInfo &from) {
23  sample_rate = from.sample_rate;
24  channels = from.channels;
26  }
27 
28  AudioFormat format = AudioFormat::PCM;
29  int byte_rate = 0;
30  int block_align = 0;
31  bool is_streamed = true;
32  bool is_valid = false;
33  uint32_t data_length = 0;
34  uint32_t file_size = 0;
35  int offset = 0;
36 };
37 
38 static const char *wav_mime = "audio/wav";
39 
47 class WAVHeader {
48  public:
49  WAVHeader() = default;
50 
52  int write(uint8_t *data, size_t data_len) {
53  int write_len = min(data_len, 44 - len);
54  memmove(buffer, data + len, write_len);
55  len += write_len;
56  LOGI("WAVHeader::write: %u -> %d -> %d", (unsigned)data_len, write_len,
57  (int)len);
58  return write_len;
59  }
60 
62  void parse() {
63  LOGI("WAVHeader::begin: %u", (unsigned)len);
64  this->data_pos = 0l;
65  memset((void *)&headerInfo, 0, sizeof(WAVAudioInfo));
66  while (!eof()) {
67  uint32_t tag, tag2, length;
68  tag = read_tag();
69  if (eof()) break;
70  length = read_int32();
71  if (!length || length >= 0x7fff0000) {
72  headerInfo.is_streamed = true;
73  length = ~0;
74  }
75  if (tag != TAG('R', 'I', 'F', 'F') || length < 4) {
76  seek(length, SEEK_CUR);
77  continue;
78  }
79  tag2 = read_tag();
80  length -= 4;
81  if (tag2 != TAG('W', 'A', 'V', 'E')) {
82  seek(length, SEEK_CUR);
83  continue;
84  }
85  // RIFF chunk found, iterate through it
86  while (length >= 8) {
87  uint32_t subtag, sublength;
88  subtag = read_tag();
89  if (eof()) break;
90  sublength = read_int32();
91  length -= 8;
92  if (length < sublength) break;
93  if (subtag == TAG('f', 'm', 't', ' ')) {
94  if (sublength < 16) {
95  // Insufficient data for 'fmt '
96  break;
97  }
98  headerInfo.format = (AudioFormat)read_int16();
99  headerInfo.channels = read_int16();
100  headerInfo.sample_rate = read_int32();
101  headerInfo.byte_rate = read_int32();
102  headerInfo.block_align = read_int16();
103  headerInfo.bits_per_sample = read_int16();
104  if (headerInfo.format == (AudioFormat) 0xfffe) {
105  if (sublength < 28) {
106  // Insufficient data for waveformatex
107  break;
108  }
109  skip(8);
110  headerInfo.format = (AudioFormat)read_int32();
111  skip(sublength - 28);
112  } else {
113  skip(sublength - 16);
114  }
115  headerInfo.is_valid = true;
116  } else if (subtag == TAG('d', 'a', 't', 'a')) {
117  sound_pos = tell();
118  headerInfo.data_length = sublength;
119  if (!headerInfo.data_length || headerInfo.is_streamed) {
120  headerInfo.is_streamed = true;
121  logInfo();
122  return;
123  }
124  seek(sublength, SEEK_CUR);
125  } else {
126  skip(sublength);
127  }
128  length -= sublength;
129  }
130  if (length > 0) {
131  // Bad chunk?
132  seek(length, SEEK_CUR);
133  }
134  }
135  logInfo();
136  len = 0;
137  }
138 
140  bool isDataComplete() { return len == 44; }
141 
143  WAVAudioInfo &audioInfo() { return headerInfo; }
144 
147  headerInfo = info;
148  }
149 
151  void writeHeader(Print *out) {
152  SingleBuffer<uint8_t> buffer(50);
153  writeRiffHeader(buffer);
154  writeFMT(buffer);
155  writeDataHeader(buffer);
156  len = buffer.available();
157  out->write(buffer.data(), buffer.available());
158  }
159 
160  protected:
161  struct WAVAudioInfo headerInfo;
162  uint8_t buffer[44];
163  size_t len = 0;
164  size_t data_pos = 0;
165  size_t sound_pos = 0;
166 
167  uint32_t read_tag() {
168  uint32_t tag = 0;
169  tag = (tag << 8) | getChar();
170  tag = (tag << 8) | getChar();
171  tag = (tag << 8) | getChar();
172  tag = (tag << 8) | getChar();
173  return tag;
174  }
175 
176  uint32_t getChar32() { return getChar(); }
177 
178  uint32_t read_int32() {
179  uint32_t value = 0;
180  value |= getChar32() << 0;
181  value |= getChar32() << 8;
182  value |= getChar32() << 16;
183  value |= getChar32() << 24;
184  return value;
185  }
186 
187  uint16_t read_int16() {
188  uint16_t value = 0;
189  value |= getChar() << 0;
190  value |= getChar() << 8;
191  return value;
192  }
193 
194  void skip(int n) {
195  int i;
196  for (i = 0; i < n; i++) getChar();
197  }
198 
199  int getChar() {
200  if (data_pos < len)
201  return buffer[data_pos++];
202  else
203  return -1;
204  }
205 
206  void seek(long int offset, int origin) {
207  if (origin == SEEK_SET) {
208  data_pos = offset;
209  } else if (origin == SEEK_CUR) {
210  data_pos += offset;
211  }
212  }
213 
214  size_t tell() { return data_pos; }
215 
216  bool eof() { return data_pos >= len - 1; }
217 
218  void logInfo() {
219  LOGI("WAVHeader sound_pos: %lu", (unsigned long)sound_pos);
220  LOGI("WAVHeader channels: %d ", headerInfo.channels);
221  LOGI("WAVHeader bits_per_sample: %d", headerInfo.bits_per_sample);
222  LOGI("WAVHeader sample_rate: %d ", headerInfo.sample_rate);
223  LOGI("WAVHeader format: %d", (int)headerInfo.format);
224  }
225 
226  void writeRiffHeader(BaseBuffer<uint8_t> &buffer) {
227  buffer.writeArray((uint8_t *)"RIFF", 4);
228  write32(buffer, headerInfo.file_size - 8);
229  buffer.writeArray((uint8_t *)"WAVE", 4);
230  }
231 
232  void writeFMT(BaseBuffer<uint8_t> &buffer) {
233  uint16_t fmt_len = 16;
234  buffer.writeArray((uint8_t *)"fmt ", 4);
235  write32(buffer, fmt_len);
236  write16(buffer, (uint16_t)headerInfo.format); // PCM
237  write16(buffer, headerInfo.channels);
238  write32(buffer, headerInfo.sample_rate);
239  write32(buffer, headerInfo.byte_rate);
240  write16(buffer, headerInfo.block_align); // frame size
241  write16(buffer, headerInfo.bits_per_sample);
242  }
243 
244  void write32(BaseBuffer<uint8_t> &buffer, uint64_t value) {
245  buffer.writeArray((uint8_t *)&value, 4);
246  }
247 
248  void write16(BaseBuffer<uint8_t> &buffer, uint16_t value) {
249  buffer.writeArray((uint8_t *)&value, 2);
250  }
251 
252  void writeDataHeader(BaseBuffer<uint8_t> &buffer) {
253  buffer.writeArray((uint8_t *)"data", 4);
254  write32(buffer, headerInfo.file_size);
255  int offset = headerInfo.offset;
256  if (offset > 0) {
257  uint8_t empty[offset];
258  memset(empty, 0, offset);
259  buffer.writeArray(empty, offset); // resolve issue with wrong aligment
260  }
261  }
262 
263 };
264 
275 class WAVDecoder : public AudioDecoder {
276  public:
280  WAVDecoder() = default;
281 
287  setDecoder(dec, fmt);
288  }
289 
292  TRACED();
293  decoder_format = fmt;
294  p_decoder = &dec;
295  }
296 
298  void setOutput(Print &out_stream) { this->p_print = &out_stream; }
299 
300  bool begin() {
301  TRACED();
302  setupEncodedAudio();
303  buffer24.reset();
304  isFirst = true;
305  active = true;
306  return true;
307  }
308 
309  void end() {
310  TRACED();
311  buffer24.reset();
312  active = false;
313  }
314 
315  const char *mime() { return wav_mime; }
316 
317  WAVAudioInfo &audioInfoEx() { return header.audioInfo(); }
318 
319  AudioInfo audioInfo() override { return header.audioInfo(); }
320 
321  virtual size_t write(const void *in_ptr, size_t in_size) {
322  TRACED();
323  size_t result = 0;
324  if (active) {
325  if (isFirst) {
326  result = decodeHeader((uint8_t*) in_ptr, in_size);
327  if (result<in_size){
328  result += write_out((uint8_t *)in_ptr+result, in_size-result);
329  }
330  } else if (isValid) {
331  result = write_out((uint8_t *)in_ptr, in_size);
332  }
333  }
334  return result;
335  }
336 
337  virtual operator bool() { return active; }
338 
339  protected:
340  WAVHeader header;
341  bool isFirst = true;
342  bool isValid = true;
343  bool active = false;
344  AudioFormat decoder_format = AudioFormat::PCM;
345  AudioDecoderExt *p_decoder = nullptr;
346  EncodedAudioOutput dec_out;
347  SingleBuffer<uint8_t> buffer24;
348 
349  Print& out() {
350  return p_decoder==nullptr ? *p_print : dec_out;
351  }
352 
353  virtual size_t write_out(const uint8_t *in_ptr, size_t in_size) {
354  // check if we need to convert int24 data from 3 bytes to 4 bytes
355  size_t result = 0;
356  if (header.audioInfo().bits_per_sample == 24 && sizeof(int24_t)==4){
357  write_out_24(in_ptr, in_size);
358  result = in_size;
359  } else {
360  result = out().write(in_ptr, in_size);
361  }
362  return result;
363  }
364 
365  // convert int24 to int32
366  size_t write_out_24(const uint8_t *in_ptr, size_t in_size) {
367  // make sure we can store a frame of 24bit (3bytes)
368  AudioInfo& info = header.audioInfo();
369  // in_size might be not a multiple of 3, so we use a buffer for a single frame
370  buffer24.resize(info.channels*3);
371  int result = 0;
372  int32_t frame[info.channels];
373  uint8_t val24[3]={0};
374 
375  // add all bytes to buffer
376  for (int j=0;j<in_size;j++){
377  buffer24.write(in_ptr[j]);
378  // if buffer is full convert and output
379  if (buffer24.availableForWrite()==0){
380  for (int ch=0;ch<info.channels;ch++){
381  buffer24.readArray((uint8_t*)&val24[0], 3);
382  frame[ch] = interpret24bitAsInt32(val24);
383  //LOGW("%d", frame[ch]);
384  }
385  assert(buffer24.available()==0);
386  buffer24.reset();
387  size_t written = out().write((uint8_t*)frame,sizeof(frame));
388  assert(written==sizeof(frame));
389  result += written;
390  }
391  }
392  return result;
393  }
394 
395  int32_t interpret24bitAsInt32(uint8_t* byteArray) {
396  return (
397  (static_cast<int32_t>(byteArray[2]) << 24)
398  | (static_cast<int32_t>(byteArray[1]) << 16)
399  | (static_cast<int32_t>(byteArray[0]) << 8)
400  );
401  }
402 
403 
404  int decodeHeader(uint8_t *in_ptr, size_t in_size) {
405  int result = 0;
406  // we expect at least the full header
407  int written = header.write(in_ptr, in_size);
408  if (!header.isDataComplete()) {
409  return written;
410  }
411  // parse header
412  header.parse();
413 
414  size_t len = in_size - written;
415  uint8_t *sound_ptr = (uint8_t *)in_ptr + written;
416  isFirst = false;
417  isValid = header.audioInfo().is_valid;
418 
419  LOGI("WAV sample_rate: %d", header.audioInfo().sample_rate);
420  LOGI("WAV data_length: %u", (unsigned)header.audioInfo().data_length);
421  LOGI("WAV is_streamed: %d", header.audioInfo().is_streamed);
422  LOGI("WAV is_valid: %s",
423  header.audioInfo().is_valid ? "true" : "false");
424 
425  // check format
426  AudioFormat format = header.audioInfo().format;
427  isValid = format == decoder_format;
428  if (isValid) {
429  // update blocksize
430  if(p_decoder!=nullptr){
431  int block_size = header.audioInfo().block_align;
432  p_decoder->setBlockSize(block_size);
433  }
434 
435  // update sampling rate if the target supports it
436  AudioInfo bi;
437  bi.sample_rate = header.audioInfo().sample_rate;
438  bi.channels = header.audioInfo().channels;
439  bi.bits_per_sample = header.audioInfo().bits_per_sample;
440  notifyAudioChange(bi);
441  // write prm data from first record
442  LOGI("WAVDecoder writing first sound data");
443  result = out().write(sound_ptr, len);
444  } else {
445  LOGE("WAV format not supported: %d", (int)format);
446  }
447  return result;
448  }
449 
450  void setupEncodedAudio() {
451  if (p_decoder!=nullptr){
452  assert(p_print!=nullptr);
453  dec_out.setOutput(p_print);
454  dec_out.setDecoder(p_decoder);
455  dec_out.begin();
456  }
457  }
458 };
459 
469 class WAVEncoder : public AudioEncoder {
470  public:
474  WAVEncoder() = default;
475 
480  setEncoder(enc, fmt);
481  };
482 
483  void setEncoder(AudioEncoderExt &enc, AudioFormat fmt) {
484  TRACED();
485  audioInfo.format = fmt;
486  p_encoder = &enc;
487  }
488 
490  void setOutput(Print &out) override {
491  TRACED();
492  p_print = &out;
493  }
494 
496  const char *mime() override { return wav_mime; }
497 
498  // Provides the default configuration
499  WAVAudioInfo defaultConfig() {
500  WAVAudioInfo info;
501  info.format = AudioFormat::PCM;
502  info.sample_rate = DEFAULT_SAMPLE_RATE;
503  info.bits_per_sample = DEFAULT_BITS_PER_SAMPLE;
504  info.channels = DEFAULT_CHANNELS;
505  info.is_streamed = true;
506  info.is_valid = true;
507  info.data_length = 0x7fff0000;
508  info.file_size = info.data_length + 36;
509  return info;
510  }
511 
513  virtual void setAudioInfo(AudioInfo from) override {
514  audioInfo.sample_rate = from.sample_rate;
515  audioInfo.channels = from.channels;
516  audioInfo.bits_per_sample = from.bits_per_sample;
517  // recalculate byte rate, block align...
518  setAudioInfo(audioInfo);
519  }
520 
522  virtual void setAudioInfo(WAVAudioInfo ai) {
524  audioInfo = ai;
525  LOGI("sample_rate: %d", audioInfo.sample_rate);
526  LOGI("channels: %d", audioInfo.channels);
527  // bytes per second
528  audioInfo.byte_rate = audioInfo.sample_rate * audioInfo.channels * audioInfo.bits_per_sample / 8;
529  if (audioInfo.format == AudioFormat::PCM){
530  audioInfo.block_align = audioInfo.bits_per_sample / 8 * audioInfo.channels;
531  }
532  if (audioInfo.is_streamed || audioInfo.data_length == 0 ||
533  audioInfo.data_length >= 0x7fff0000) {
534  LOGI("is_streamed! because length is %u",
535  (unsigned)audioInfo.data_length);
536  audioInfo.is_streamed = true;
537  audioInfo.data_length = ~0;
538  } else {
539  size_limit = audioInfo.data_length;
540  LOGI("size_limit is %d", (int)size_limit);
541  }
542  }
543 
545  bool begin(WAVAudioInfo ai) {
546  setAudioInfo(ai);
547  return begin();
548  }
549 
551  virtual bool begin() override {
552  TRACED();
553  setupEncodedAudio();
554  header_written = false;
555  is_open = true;
556  return true;
557  }
558 
560  void end() override { is_open = false; }
561 
563  virtual size_t write(const void *in_ptr, size_t in_size) override {
564  if (!is_open) {
565  LOGE("The WAVEncoder is not open - please call begin()");
566  return 0;
567  }
568 
569  if (p_print == nullptr) {
570  LOGE("No output stream was provided");
571  return 0;
572  }
573 
574  if (!header_written) {
575  LOGI("Writing Header");
576  header.setAudioInfo(audioInfo);
577  header.writeHeader(p_print);
578  audioInfo.file_size -= 44;
579  header_written = true;
580  }
581 
582  int32_t result = 0;
583  Print *p_out = p_encoder==nullptr ? p_print : &enc_out;;
584  if (audioInfo.is_streamed) {
585  result = p_out->write((uint8_t *)in_ptr, in_size);
586  } else if (size_limit > 0) {
587  size_t write_size = min((size_t)in_size, (size_t)size_limit);
588  result = p_out->write((uint8_t *)in_ptr, write_size);
589  size_limit -= result;
590 
591  if (size_limit <= 0) {
592  LOGI("The defined size was written - so we close the WAVEncoder now");
593  is_open = false;
594  }
595  }
596  return result;
597  }
598 
599  operator bool() override { return is_open; }
600 
601  bool isOpen() { return is_open; }
602 
604  void setDataOffset(uint16_t offset) { audioInfo.offset = offset; }
605 
606  protected:
607  WAVHeader header;
608  Print *p_print = nullptr; // final output CopyEncoder copy; // used for PCM
609  AudioEncoderExt *p_encoder = nullptr;
610  EncodedAudioOutput enc_out;
611  WAVAudioInfo audioInfo = defaultConfig();
612  int64_t size_limit = 0;
613  bool header_written = false;
614  volatile bool is_open = false;
615 
616  void setupEncodedAudio() {
617  if (p_encoder!=nullptr){
618  assert(p_print!=nullptr);
619  enc_out.setOutput(p_print);
620  enc_out.setEncoder(p_encoder);
621  enc_out.setAudioInfo(audioInfo);
622  enc_out.begin();
623  // block size only available after begin(): update block size
624  audioInfo.block_align = p_encoder->blockSize();
625  }
626  }
627 };
628 
629 } // namespace audio_tools
WAV Audio Formats used by Microsoft e.g. in AVI video files.
Definition: AudioEncoded.h:102
Docoding of encoded audio into PCM data.
Definition: AudioEncoded.h:18
Definition: AudioEncoded.h:107
Encoding of PCM data.
Definition: AudioEncoded.h:85
void setAudioInfo(AudioInfo from) override
Defines the sample rate, number of channels and bits per sample.
Definition: AudioEncoded.h:94
A more natural Print class to process encoded data (aac, wav, mp3...). Just define the output and the...
Definition: AudioEncoded.h:264
bool begin() override
Starts the processing - sets the status to active.
Definition: AudioEncoded.h:400
void setOutput(Print *outputStream)
Defines the output.
Definition: AudioEncoded.h:360
Definition: NoArduino.h:58
T * data()
Provides address of actual data.
Definition: Buffers.h:244
int available() override
provides the number of entries that are available to read
Definition: Buffers.h:211
void reset() override
clears the buffer
Definition: Buffers.h:246
A simple WAVDecoder: We parse the header data on the first record to determine the format....
Definition: CodecWAV.h:275
void setDecoder(AudioDecoderExt &dec, AudioFormat fmt)
Defines an optional decoder if the format is not PCM.
Definition: CodecWAV.h:291
void setOutput(Print &out_stream)
Defines the output Stream.
Definition: CodecWAV.h:298
WAVDecoder()=default
Construct a new WAVDecoder object for PCM data.
WAVDecoder(AudioDecoderExt &dec, AudioFormat fmt)
Construct a new WAVDecoder object for ADPCM data.
Definition: CodecWAV.h:286
A simple WAV file encoder. If no AudioEncoderExt is specified the WAV file contains PCM data,...
Definition: CodecWAV.h:469
void setOutput(Print &out) override
Defines the otuput stream.
Definition: CodecWAV.h:490
virtual size_t write(const void *in_ptr, size_t in_size) override
Writes PCM data to be encoded as WAV.
Definition: CodecWAV.h:563
virtual bool begin() override
starts the processing using the actual WAVAudioInfo
Definition: CodecWAV.h:551
bool begin(WAVAudioInfo ai)
starts the processing
Definition: CodecWAV.h:545
const char * mime() override
Provides "audio/wav".
Definition: CodecWAV.h:496
void end() override
stops the processing
Definition: CodecWAV.h:560
virtual void setAudioInfo(WAVAudioInfo ai)
Defines the WAVAudioInfo.
Definition: CodecWAV.h:522
WAVEncoder()=default
Construct a new WAVEncoder object for PCM data.
void setDataOffset(uint16_t offset)
Adds n empty bytes at the beginning of the data.
Definition: CodecWAV.h:604
WAVEncoder(AudioEncoderExt &enc, AudioFormat fmt)
Construct a new WAVEncoder object for ADPCM data.
Definition: CodecWAV.h:479
virtual void setAudioInfo(AudioInfo from) override
Update actual WAVAudioInfo.
Definition: CodecWAV.h:513
Parser for Wav header data for details see https://de.wikipedia.org/wiki/RIFF_WAVE.
Definition: CodecWAV.h:47
bool isDataComplete()
Returns true if the header is complete (with 44 bytes)
Definition: CodecWAV.h:140
void setAudioInfo(WAVAudioInfo info)
Sets the info in the header.
Definition: CodecWAV.h:146
void writeHeader(Print *out)
Just write a wav header to the indicated output.
Definition: CodecWAV.h:151
int write(uint8_t *data, size_t data_len)
Adds data to the 44 byte wav header data buffer and make it available for parsing.
Definition: CodecWAV.h:52
void parse()
Call begin when header data is complete to parse the data.
Definition: CodecWAV.h:62
WAVAudioInfo & audioInfo()
provides the info from the header
Definition: CodecWAV.h:143
AudioFormat
Audio format codes used by Microsoft e.g. in avi or wav files.
Definition: AudioFormat.h:19
Generic Implementation of sound input and output for desktop environments using portaudio.
Definition: AnalogAudio.h:10
Basic Audio information which drives e.g. I2S.
Definition: AudioTypes.h:48
int channels
Number of channels: 2=stereo, 1=mono.
Definition: AudioTypes.h:53
int bits_per_sample
Number of bits per sample (int16_t = 16 bits)
Definition: AudioTypes.h:55
int sample_rate
Sample Rate: e.g 44100.
Definition: AudioTypes.h:51
Sound information which is available in the WAV header.
Definition: CodecWAV.h:20