arduino-audio-tools
CodecWAV.h
1 #pragma once
2 
3 #include "AudioCodecs/AudioCodecsBase.h"
5 
6 
7 #define TAG(a, b, c, d) \
8  ((static_cast<uint32_t>(a) << 24) | (static_cast<uint32_t>(b) << 16) | \
9  (static_cast<uint32_t>(c) << 8) | (d))
10 #define READ_BUFFER_SIZE 512
11 
12 namespace audio_tools {
13 
21  WAVAudioInfo() = default;
22  WAVAudioInfo(const AudioInfo &from) {
23  sample_rate = from.sample_rate;
24  channels = from.channels;
26  }
27 
28  AudioFormat format = AudioFormat::PCM;
29  int byte_rate = 0;
30  int block_align = 0;
31  bool is_streamed = true;
32  bool is_valid = false;
33  uint32_t data_length = 0;
34  uint32_t file_size = 0;
35  int offset = 0;
36 };
37 
38 static const char *wav_mime = "audio/wav";
39 
47 class WAVHeader {
48  public:
49  WAVHeader() = default;
50 
52  int write(uint8_t *data, size_t data_len) {
53  int write_len = min(data_len, 44 - len);
54  memmove(buffer, data + len, write_len);
55  len += write_len;
56  LOGI("WAVHeader::write: %u -> %d -> %d", (unsigned)data_len, write_len,
57  (int)len);
58  return write_len;
59  }
60 
62  void parse() {
63  LOGI("WAVHeader::begin: %u", (unsigned)len);
64  this->data_pos = 0l;
65  memset((void *)&headerInfo, 0, sizeof(WAVAudioInfo));
66  while (!eof()) {
67  uint32_t tag, tag2, length;
68  tag = read_tag();
69  if (eof()) break;
70  length = read_int32();
71  if (!length || length >= 0x7fff0000) {
72  headerInfo.is_streamed = true;
73  length = ~0;
74  }
75  if (tag != TAG('R', 'I', 'F', 'F') || length < 4) {
76  seek(length, SEEK_CUR);
77  continue;
78  }
79  headerInfo.file_size = length;
80  tag2 = read_tag();
81  length -= 4;
82  if (tag2 != TAG('W', 'A', 'V', 'E')) {
83  seek(length, SEEK_CUR);
84  continue;
85  }
86  // RIFF chunk found, iterate through it
87  while (length >= 8) {
88  uint32_t subtag, sublength;
89  subtag = read_tag();
90  if (eof()) break;
91  sublength = read_int32();
92  length -= 8;
93  if (length < sublength) break;
94  if (subtag == TAG('f', 'm', 't', ' ')) {
95  if (sublength < 16) {
96  // Insufficient data for 'fmt '
97  break;
98  }
99  headerInfo.format = (AudioFormat)read_int16();
100  headerInfo.channels = read_int16();
101  headerInfo.sample_rate = read_int32();
102  headerInfo.byte_rate = read_int32();
103  headerInfo.block_align = read_int16();
104  headerInfo.bits_per_sample = read_int16();
105  if (headerInfo.format == (AudioFormat) 0xfffe) {
106  if (sublength < 28) {
107  // Insufficient data for waveformatex
108  break;
109  }
110  skip(8);
111  headerInfo.format = (AudioFormat)read_int32();
112  skip(sublength - 28);
113  } else {
114  skip(sublength - 16);
115  }
116  headerInfo.is_valid = true;
117  } else if (subtag == TAG('d', 'a', 't', 'a')) {
118  sound_pos = tell();
119  headerInfo.data_length = sublength;
120  if (!headerInfo.data_length || headerInfo.is_streamed) {
121  headerInfo.is_streamed = true;
122  logInfo();
123  return;
124  }
125  seek(sublength, SEEK_CUR);
126  } else {
127  skip(sublength);
128  }
129  length -= sublength;
130  }
131  if (length > 0) {
132  // Bad chunk?
133  seek(length, SEEK_CUR);
134  }
135  }
136  logInfo();
137  len = 0;
138  }
139 
141  bool isDataComplete() { return len == 44; }
142 
144  WAVAudioInfo &audioInfo() { return headerInfo; }
145 
148  headerInfo = info;
149  }
150 
152  void writeHeader(Print *out) {
153  SingleBuffer<uint8_t> buffer(50);
154  writeRiffHeader(buffer);
155  writeFMT(buffer);
156  writeDataHeader(buffer);
157  len = buffer.available();
158  out->write(buffer.data(), buffer.available());
159  }
160 
161  protected:
162  struct WAVAudioInfo headerInfo;
163  uint8_t buffer[44];
164  size_t len = 0;
165  size_t data_pos = 0;
166  size_t sound_pos = 0;
167 
168  uint32_t read_tag() {
169  uint32_t tag = 0;
170  tag = (tag << 8) | getChar();
171  tag = (tag << 8) | getChar();
172  tag = (tag << 8) | getChar();
173  tag = (tag << 8) | getChar();
174  return tag;
175  }
176 
177  uint32_t getChar32() { return getChar(); }
178 
179  uint32_t read_int32() {
180  uint32_t value = 0;
181  value |= getChar32() << 0;
182  value |= getChar32() << 8;
183  value |= getChar32() << 16;
184  value |= getChar32() << 24;
185  return value;
186  }
187 
188  uint16_t read_int16() {
189  uint16_t value = 0;
190  value |= getChar() << 0;
191  value |= getChar() << 8;
192  return value;
193  }
194 
195  void skip(int n) {
196  int i;
197  for (i = 0; i < n; i++) getChar();
198  }
199 
200  int getChar() {
201  if (data_pos < len)
202  return buffer[data_pos++];
203  else
204  return -1;
205  }
206 
207  void seek(long int offset, int origin) {
208  if (origin == SEEK_SET) {
209  data_pos = offset;
210  } else if (origin == SEEK_CUR) {
211  data_pos += offset;
212  }
213  }
214 
215  size_t tell() { return data_pos; }
216 
217  bool eof() { return data_pos >= len - 1; }
218 
219  void logInfo() {
220  LOGI("WAVHeader sound_pos: %lu", (unsigned long)sound_pos);
221  LOGI("WAVHeader channels: %d ", headerInfo.channels);
222  LOGI("WAVHeader bits_per_sample: %d", headerInfo.bits_per_sample);
223  LOGI("WAVHeader sample_rate: %d ", (int) headerInfo.sample_rate);
224  LOGI("WAVHeader format: %d", (int)headerInfo.format);
225  }
226 
227  void writeRiffHeader(BaseBuffer<uint8_t> &buffer) {
228  buffer.writeArray((uint8_t *)"RIFF", 4);
229  write32(buffer, headerInfo.file_size - 8);
230  buffer.writeArray((uint8_t *)"WAVE", 4);
231  }
232 
233  void writeFMT(BaseBuffer<uint8_t> &buffer) {
234  uint16_t fmt_len = 16;
235  buffer.writeArray((uint8_t *)"fmt ", 4);
236  write32(buffer, fmt_len);
237  write16(buffer, (uint16_t)headerInfo.format); // PCM
238  write16(buffer, headerInfo.channels);
239  write32(buffer, headerInfo.sample_rate);
240  write32(buffer, headerInfo.byte_rate);
241  write16(buffer, headerInfo.block_align); // frame size
242  write16(buffer, headerInfo.bits_per_sample);
243  }
244 
245  void write32(BaseBuffer<uint8_t> &buffer, uint64_t value) {
246  buffer.writeArray((uint8_t *)&value, 4);
247  }
248 
249  void write16(BaseBuffer<uint8_t> &buffer, uint16_t value) {
250  buffer.writeArray((uint8_t *)&value, 2);
251  }
252 
253  void writeDataHeader(BaseBuffer<uint8_t> &buffer) {
254  buffer.writeArray((uint8_t *)"data", 4);
255  write32(buffer, headerInfo.file_size);
256  int offset = headerInfo.offset;
257  if (offset > 0) {
258  uint8_t empty[offset];
259  memset(empty, 0, offset);
260  buffer.writeArray(empty, offset); // resolve issue with wrong aligment
261  }
262  }
263 
264 };
265 
276 class WAVDecoder : public AudioDecoder {
277  public:
281  WAVDecoder() = default;
282 
288  setDecoder(dec, fmt);
289  }
290 
293  TRACED();
294  decoder_format = fmt;
295  p_decoder = &dec;
296  }
297 
299  void setOutput(Print &out_stream) override { this->p_print = &out_stream; }
300 
301  bool begin() override {
302  TRACED();
303  setupEncodedAudio();
304  buffer24.reset();
305  isFirst = true;
306  active = true;
307  return true;
308  }
309 
310  void end() override {
311  TRACED();
312  buffer24.reset();
313  active = false;
314  }
315 
316  const char *mime() { return wav_mime; }
317 
318  WAVAudioInfo &audioInfoEx() { return header.audioInfo(); }
319 
320  AudioInfo audioInfo() override { return header.audioInfo(); }
321 
322  virtual size_t write(const uint8_t *data, size_t len) override {
323  TRACED();
324  size_t result = 0;
325  if (active) {
326  if (isFirst) {
327  result = decodeHeader((uint8_t*) data, len);
328  if (result<len){
329  result += write_out((uint8_t *)data+result, len-result);
330  }
331  } else if (isValid) {
332  result = write_out((uint8_t *)data, len);
333  }
334  }
335  return result;
336  }
337 
338  virtual operator bool() override { return active; }
339 
340  protected:
341  WAVHeader header;
342  bool isFirst = true;
343  bool isValid = true;
344  bool active = false;
345  AudioFormat decoder_format = AudioFormat::PCM;
346  AudioDecoderExt *p_decoder = nullptr;
347  EncodedAudioOutput dec_out;
348  SingleBuffer<uint8_t> buffer24;
349 
350  Print& out() {
351  return p_decoder==nullptr ? *p_print : dec_out;
352  }
353 
354  virtual size_t write_out(const uint8_t *in_ptr, size_t in_size) {
355  // check if we need to convert int24 data from 3 bytes to 4 bytes
356  size_t result = 0;
357  if (header.audioInfo().bits_per_sample == 24 && sizeof(int24_t)==4){
358  write_out_24(in_ptr, in_size);
359  result = in_size;
360  } else {
361  result = out().write(in_ptr, in_size);
362  }
363  return result;
364  }
365 
366  // convert int24 to int32
367  size_t write_out_24(const uint8_t *in_ptr, size_t in_size) {
368  // make sure we can store a frame of 24bit (3bytes)
369  AudioInfo& info = header.audioInfo();
370  // in_size might be not a multiple of 3, so we use a buffer for a single frame
371  buffer24.resize(info.channels*3);
372  int result = 0;
373  int32_t frame[info.channels];
374  uint8_t val24[3]={0};
375 
376  // add all bytes to buffer
377  for (int j=0;j<in_size;j++){
378  buffer24.write(in_ptr[j]);
379  // if buffer is full convert and output
380  if (buffer24.availableForWrite()==0){
381  for (int ch=0;ch<info.channels;ch++){
382  buffer24.readArray((uint8_t*)&val24[0], 3);
383  frame[ch] = interpret24bitAsInt32(val24);
384  //LOGW("%d", frame[ch]);
385  }
386  assert(buffer24.available()==0);
387  buffer24.reset();
388  size_t written = out().write((uint8_t*)frame,sizeof(frame));
389  assert(written==sizeof(frame));
390  result += written;
391  }
392  }
393  return result;
394  }
395 
396  int32_t interpret24bitAsInt32(uint8_t* byteArray) {
397  return (
398  (static_cast<int32_t>(byteArray[2]) << 24)
399  | (static_cast<int32_t>(byteArray[1]) << 16)
400  | (static_cast<int32_t>(byteArray[0]) << 8)
401  );
402  }
403 
404 
405  int decodeHeader(uint8_t *in_ptr, size_t in_size) {
406  int result = 0;
407  // we expect at least the full header
408  int written = header.write(in_ptr, in_size);
409  if (!header.isDataComplete()) {
410  return written;
411  }
412  // parse header
413  header.parse();
414 
415  size_t len = in_size - written;
416  uint8_t *sound_ptr = (uint8_t *)in_ptr + written;
417  isFirst = false;
418  isValid = header.audioInfo().is_valid;
419 
420  LOGI("WAV sample_rate: %d", (int) header.audioInfo().sample_rate);
421  LOGI("WAV data_length: %u", (unsigned)header.audioInfo().data_length);
422  LOGI("WAV is_streamed: %d", header.audioInfo().is_streamed);
423  LOGI("WAV is_valid: %s",
424  header.audioInfo().is_valid ? "true" : "false");
425 
426  // check format
427  AudioFormat format = header.audioInfo().format;
428  isValid = format == decoder_format;
429  if (isValid) {
430  // update blocksize
431  if(p_decoder!=nullptr){
432  int block_size = header.audioInfo().block_align;
433  p_decoder->setBlockSize(block_size);
434  }
435 
436  // update sampling rate if the target supports it
437  AudioInfo bi;
438  bi.sample_rate = header.audioInfo().sample_rate;
439  bi.channels = header.audioInfo().channels;
440  bi.bits_per_sample = header.audioInfo().bits_per_sample;
441  notifyAudioChange(bi);
442  // write prm data from first record
443  LOGI("WAVDecoder writing first sound data");
444  result = out().write(sound_ptr, len);
445  } else {
446  LOGE("WAV format not supported: %d", (int)format);
447  }
448  return result;
449  }
450 
451  void setupEncodedAudio() {
452  if (p_decoder!=nullptr){
453  assert(p_print!=nullptr);
454  dec_out.setOutput(p_print);
455  dec_out.setDecoder(p_decoder);
456  dec_out.begin();
457  }
458  }
459 };
460 
470 class WAVEncoder : public AudioEncoder {
471  public:
475  WAVEncoder() = default;
476 
481  setEncoder(enc, fmt);
482  };
483 
484  void setEncoder(AudioEncoderExt &enc, AudioFormat fmt) {
485  TRACED();
486  audioInfo.format = fmt;
487  p_encoder = &enc;
488  }
489 
491  void setOutput(Print &out) override {
492  TRACED();
493  p_print = &out;
494  }
495 
497  const char *mime() override { return wav_mime; }
498 
499  // Provides the default configuration
500  WAVAudioInfo defaultConfig() {
501  WAVAudioInfo info;
502  info.format = AudioFormat::PCM;
503  info.sample_rate = DEFAULT_SAMPLE_RATE;
504  info.bits_per_sample = DEFAULT_BITS_PER_SAMPLE;
505  info.channels = DEFAULT_CHANNELS;
506  info.is_streamed = true;
507  info.is_valid = true;
508  info.data_length = 0x7fff0000;
509  info.file_size = info.data_length + 36;
510  return info;
511  }
512 
514  virtual void setAudioInfo(AudioInfo from) override {
515  audioInfo.sample_rate = from.sample_rate;
516  audioInfo.channels = from.channels;
517  audioInfo.bits_per_sample = from.bits_per_sample;
518  // recalculate byte rate, block align...
519  setAudioInfo(audioInfo);
520  }
521 
523  virtual void setAudioInfo(WAVAudioInfo ai) {
525  audioInfo = ai;
526  LOGI("sample_rate: %d", (int)audioInfo.sample_rate);
527  LOGI("channels: %d", audioInfo.channels);
528  // bytes per second
529  audioInfo.byte_rate = audioInfo.sample_rate * audioInfo.channels * audioInfo.bits_per_sample / 8;
530  if (audioInfo.format == AudioFormat::PCM){
531  audioInfo.block_align = audioInfo.bits_per_sample / 8 * audioInfo.channels;
532  }
533  if (audioInfo.is_streamed || audioInfo.data_length == 0 ||
534  audioInfo.data_length >= 0x7fff0000) {
535  LOGI("is_streamed! because length is %u",
536  (unsigned)audioInfo.data_length);
537  audioInfo.is_streamed = true;
538  audioInfo.data_length = ~0;
539  } else {
540  size_limit = audioInfo.data_length;
541  LOGI("size_limit is %d", (int)size_limit);
542  }
543  }
544 
546  bool begin(WAVAudioInfo ai) {
547  setAudioInfo(ai);
548  return begin();
549  }
550 
552  virtual bool begin() override {
553  TRACED();
554  setupEncodedAudio();
555  header_written = false;
556  is_open = true;
557  return true;
558  }
559 
561  void end() override { is_open = false; }
562 
564  virtual size_t write(const uint8_t *data, size_t len) override {
565  if (!is_open) {
566  LOGE("The WAVEncoder is not open - please call begin()");
567  return 0;
568  }
569 
570  if (p_print == nullptr) {
571  LOGE("No output stream was provided");
572  return 0;
573  }
574 
575  if (!header_written) {
576  LOGI("Writing Header");
577  header.setAudioInfo(audioInfo);
578  header.writeHeader(p_print);
579  audioInfo.file_size -= 44;
580  header_written = true;
581  }
582 
583  int32_t result = 0;
584  Print *p_out = p_encoder==nullptr ? p_print : &enc_out;;
585  if (audioInfo.is_streamed) {
586  result = p_out->write((uint8_t *)data, len);
587  } else if (size_limit > 0) {
588  size_t write_size = min((size_t)len, (size_t)size_limit);
589  result = p_out->write((uint8_t *)data, write_size);
590  size_limit -= result;
591 
592  if (size_limit <= 0) {
593  LOGI("The defined size was written - so we close the WAVEncoder now");
594  is_open = false;
595  }
596  }
597  return result;
598  }
599 
600  operator bool() override { return is_open; }
601 
602  bool isOpen() { return is_open; }
603 
605  void setDataOffset(uint16_t offset) { audioInfo.offset = offset; }
606 
607  protected:
608  WAVHeader header;
609  Print *p_print = nullptr; // final output CopyEncoder copy; // used for PCM
610  AudioEncoderExt *p_encoder = nullptr;
611  EncodedAudioOutput enc_out;
612  WAVAudioInfo audioInfo = defaultConfig();
613  int64_t size_limit = 0;
614  bool header_written = false;
615  volatile bool is_open = false;
616 
617  void setupEncodedAudio() {
618  if (p_encoder!=nullptr){
619  assert(p_print!=nullptr);
620  enc_out.setOutput(p_print);
621  enc_out.setEncoder(p_encoder);
622  enc_out.setAudioInfo(audioInfo);
623  enc_out.begin();
624  // block size only available after begin(): update block size
625  audioInfo.block_align = p_encoder->blockSize();
626  }
627  }
628 };
629 
630 } // namespace audio_tools
WAV Audio Formats used by Microsoft e.g. in AVI video files.
Definition: AudioCodecsBase.h:103
Docoding of encoded audio into PCM data.
Definition: AudioCodecsBase.h:16
Definition: AudioCodecsBase.h:108
Encoding of PCM data.
Definition: AudioCodecsBase.h:87
void setAudioInfo(AudioInfo from) override
Defines the sample rate, number of channels and bits per sample.
Definition: AudioCodecsBase.h:96
A more natural Print class to process encoded data (aac, wav, mp3...). Just define the output and the...
Definition: AudioEncoded.h:21
bool begin() override
Starts the processing - sets the status to active.
Definition: AudioEncoded.h:136
virtual void setAudioInfo(AudioInfo newInfo) override
Defines the input AudioInfo.
Definition: AudioEncoded.h:87
void setOutput(Print &outputStream)
Defines/Changes the output target.
Definition: AudioEncoded.h:96
Definition: NoArduino.h:58
T * data()
Provides address of actual data.
Definition: Buffers.h:246
int available() override
provides the number of entries that are available to read
Definition: Buffers.h:213
void reset() override
clears the buffer
Definition: Buffers.h:248
A simple WAVDecoder: We parse the header data on the first record to determine the format....
Definition: CodecWAV.h:276
void setOutput(Print &out_stream) override
Defines the output Stream.
Definition: CodecWAV.h:299
void setDecoder(AudioDecoderExt &dec, AudioFormat fmt)
Defines an optional decoder if the format is not PCM.
Definition: CodecWAV.h:292
WAVDecoder()=default
Construct a new WAVDecoder object for PCM data.
WAVDecoder(AudioDecoderExt &dec, AudioFormat fmt)
Construct a new WAVDecoder object for ADPCM data.
Definition: CodecWAV.h:287
AudioInfo audioInfo() override
provides the actual input AudioInfo
Definition: CodecWAV.h:320
A simple WAV file encoder. If no AudioEncoderExt is specified the WAV file contains PCM data,...
Definition: CodecWAV.h:470
virtual size_t write(const uint8_t *data, size_t len) override
Writes PCM data to be encoded as WAV.
Definition: CodecWAV.h:564
void setOutput(Print &out) override
Defines the otuput stream.
Definition: CodecWAV.h:491
virtual bool begin() override
starts the processing using the actual WAVAudioInfo
Definition: CodecWAV.h:552
bool begin(WAVAudioInfo ai)
starts the processing
Definition: CodecWAV.h:546
const char * mime() override
Provides "audio/wav".
Definition: CodecWAV.h:497
void end() override
stops the processing
Definition: CodecWAV.h:561
virtual void setAudioInfo(WAVAudioInfo ai)
Defines the WAVAudioInfo.
Definition: CodecWAV.h:523
WAVEncoder()=default
Construct a new WAVEncoder object for PCM data.
void setDataOffset(uint16_t offset)
Adds n empty bytes at the beginning of the data.
Definition: CodecWAV.h:605
WAVEncoder(AudioEncoderExt &enc, AudioFormat fmt)
Construct a new WAVEncoder object for ADPCM data.
Definition: CodecWAV.h:480
virtual void setAudioInfo(AudioInfo from) override
Update actual WAVAudioInfo.
Definition: CodecWAV.h:514
Parser for Wav header data for details see https://de.wikipedia.org/wiki/RIFF_WAVE.
Definition: CodecWAV.h:47
bool isDataComplete()
Returns true if the header is complete (with 44 bytes)
Definition: CodecWAV.h:141
void setAudioInfo(WAVAudioInfo info)
Sets the info in the header.
Definition: CodecWAV.h:147
void writeHeader(Print *out)
Just write a wav header to the indicated output.
Definition: CodecWAV.h:152
int write(uint8_t *data, size_t data_len)
Adds data to the 44 byte wav header data buffer and make it available for parsing.
Definition: CodecWAV.h:52
void parse()
Call begin when header data is complete to parse the data.
Definition: CodecWAV.h:62
WAVAudioInfo & audioInfo()
provides the info from the header
Definition: CodecWAV.h:144
AudioFormat
Audio format codes used by Microsoft e.g. in avi or wav files.
Definition: AudioFormat.h:19
Generic Implementation of sound input and output for desktop environments using portaudio.
Definition: AnalogAudioArduino.h:12
Basic Audio information which drives e.g. I2S.
Definition: AudioTypes.h:50
sample_rate_t sample_rate
Sample Rate: e.g 44100.
Definition: AudioTypes.h:53
uint16_t channels
Number of channels: 2=stereo, 1=mono.
Definition: AudioTypes.h:55
uint8_t bits_per_sample
Number of bits per sample (int16_t = 16 bits)
Definition: AudioTypes.h:57
Sound information which is available in the WAV header.
Definition: CodecWAV.h:20