arduino-audio-tools/_codec_wav_i_m_a_8h_source.html

#pragma once


#include "AudioTools/AudioCodecs/AudioCodecsBase.h"


#define WAVE_FORMAT_IMA_ADPCM 0x0011

#define TAG(a, b, c, d) ((static_cast<uint32_t>(a) << 24) | (static_cast<uint32_t>(b) << 16) | (static_cast<uint32_t>(c) << 8) | (d))

#define READ_BUFFER_SIZE 512


namespace audio_tools {


const int16_t ima_index_table[16] {

    -1, -1, -1, -1, 2, 4, 6, 8,

    -1, -1, -1, -1, 2, 4, 6, 8

};


const int32_t ima_step_table[89] {

    7, 8, 9, 10, 11, 12, 13, 14, 16, 17,

    19, 21, 23, 25, 28, 31, 34, 37, 41, 45,

    50, 55, 60, 66, 73, 80, 88, 97, 107, 118,

    130, 143, 157, 173, 190, 209, 230, 253, 279, 307,

    337, 371, 408, 449, 494, 544, 598, 658, 724, 796,

    876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066,

    2272, 2499, 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358,

    5894, 6484, 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899,

    15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767

};


struct WavIMAAudioInfo : AudioInfo {

    WavIMAAudioInfo() = default;

    WavIMAAudioInfo(const AudioInfo& from) {

        sample_rate = from.sample_rate;

        channels = from.channels;

        bits_per_sample = from.bits_per_sample;

    }


    int format = WAVE_FORMAT_IMA_ADPCM;

    int byte_rate = 0;

    int block_align = 0;

    int frames_per_block = 0;

    int num_samples = 0;

    bool is_valid = false;

    uint32_t data_length = 0;

    uint32_t file_size = 0;

};

struct WavIMAAudioInfo : AudioInfo {…};


struct IMAState {

    int32_t predictor = 0;

    int step_index = 0;

};

struct IMAState {…};


const char* wav_ima_mime = "audio/x-wav";


typedef enum {

    IMA_ERR_INVALID_CHUNK = -2,

    IMA_ERR_INVALID_CONTAINER,

    IMA_CHUNK_OK,

    IMA_CHUNK_UNKNOWN

} chunk_result;

typedef enum {…};


class WavIMAHeader  {

    public:

        WavIMAHeader() {

            clearHeader();

        };


        void clearHeader() {

            data_pos = 0;

            memset((void*)&headerInfo, 0, sizeof(WavIMAAudioInfo));

            headerInfo.is_valid = false;

            header_complete = false;

            chunk_len = 0;

            max_chunk_len = 8;

            skip_len = 0;

            isFirstChunk = true;

        }


        chunk_result parseChunk() {

            data_pos = 0;

            bool chunkUnknown = false;

            uint32_t tag = read_tag();

            uint32_t length = read_int32();

            if (length < 4) {

                return IMA_ERR_INVALID_CHUNK;

            }

            if (tag == TAG('R', 'I', 'F', 'F')) {

                uint32_t container_type = read_tag();

                if (container_type != TAG('W', 'A', 'V', 'E')) {

                    return IMA_ERR_INVALID_CONTAINER;

                }

            }

            else if (tag == TAG('f', 'm', 't', ' ')) {

                if (length < 20) {

                    // Insufficient data for 'fmt '

                    return IMA_ERR_INVALID_CHUNK;

                }

                headerInfo.format          = read_int16();

                headerInfo.channels        = read_int16();

                headerInfo.sample_rate     = read_int32();

                headerInfo.byte_rate       = read_int32();

                headerInfo.block_align     = read_int16();

                headerInfo.bits_per_sample = read_int16();


                // Skip the size parameter for extra information as for IMA ADPCM the following data should always be 2 bytes.

                skip(2);

                headerInfo.frames_per_block = read_int16();

                if (headerInfo.format != WAVE_FORMAT_IMA_ADPCM || headerInfo.channels > 2) {

                    // Insufficient or invalid data for waveformatex

                    LOGE("Format not supported: %d, %d\n", headerInfo.format, headerInfo.channels);

                    return IMA_ERR_INVALID_CHUNK;

                } else {

                    headerInfo.is_valid = true; // At this point we know that the format information is valid

                }

            } else if (tag == TAG('f', 'a', 'c', 't')) {

                /* In the context of ADPCM the fact chunk should contain the total number of mono or stereo samples

                    however we shouldn't rely on this as some programs (e.g. Audacity) write an incorrect value in some cases. This value is currently not used by the decoder.

                */

                headerInfo.num_samples = read_int32();

            } else if (tag == TAG('d', 'a', 't', 'a')) {

                // Size of the data chunk.

                headerInfo.data_length = length;

            } else {

                chunkUnknown = true;

            }

            // Skip any remaining data that exceeds the buffer

            if (tag != TAG('R', 'I', 'F', 'F') && length > 20) skip_len = length - 20;

            return chunkUnknown ? IMA_CHUNK_UNKNOWN : IMA_CHUNK_OK;

        }


        /* Adds data to the header data buffer

           Because the header isn't necessarily uniform, we go through each chunk individually

           and only copy the ones we need. This could probably still be optimized. */

        int write(uint8_t* data, size_t data_len) {

            int write_len;

            int data_offset = 0;

            while (data_len > 0 && !header_complete) {

                if (skip_len > 0) {

                    /* Used to skip any unknown chunks or chunks that are longer than expected.

                       Some encoders like ffmpeg write meta information before the "data" chunk by default. */

                    write_len = min(skip_len, data_len);

                    skip_len -= write_len;

                    data_offset += write_len;

                    data_len -= write_len;

                }

                else {

                    // Search / Wait for the individual chunks and write them to the temporary buffer.

                    write_len = min(data_len, max_chunk_len - chunk_len);

                    memmove(chunk_buffer + chunk_len, data + data_offset, write_len);

                    chunk_len += write_len;

                    data_offset += write_len;

                    data_len -= write_len;


                    if (chunk_len == max_chunk_len) {

                        data_pos = 0;

                        if (max_chunk_len == 8) {

                            uint32_t chunk_tag = read_tag();

                            uint32_t chunk_size = read_int32();

                            if (isFirstChunk && chunk_tag != TAG('R', 'I', 'F', 'F')) {

                                headerInfo.is_valid = false;

                                return IMA_ERR_INVALID_CONTAINER;

                            }

                            isFirstChunk = false;

                            if (chunk_tag == TAG('R', 'I', 'F', 'F')) chunk_size = 4;

                            else if (chunk_tag == TAG('d', 'a', 't', 'a')) {

                                parseChunk();

                                header_complete = true;

                                logInfo();

                                break;

                            }


                            /* Wait for the rest of the data before processing the chunk.

                               The largest chunk we expect is the "fmt " chunk which is 20 bytes long in this case. */

                            write_len = min((size_t)chunk_size, (size_t)20);

                            max_chunk_len += write_len;

                            continue;

                        }

                        else {

                            chunk_result result = parseChunk();

                            switch (result) {

                                // Abort processing the header if the RIFF container or a required chunk is not valid

                                case IMA_ERR_INVALID_CONTAINER:

                                case IMA_ERR_INVALID_CHUNK:

                                headerInfo.is_valid = false;

                                return result;

                                break;

                            }

                            chunk_len = 0;

                            max_chunk_len = 8;

                        }

                    }

                }

            }

            return data_offset;

        }


        bool isDataComplete() {

            return header_complete;

        }

        bool isDataComplete() {…}


        // provides the AudioInfo

        WavIMAAudioInfo &audioInfo() {

            return headerInfo;

        }


    protected:

        struct WavIMAAudioInfo headerInfo;

        uint8_t chunk_buffer[28];

        size_t chunk_len = 0;

        size_t max_chunk_len = 8;

        size_t skip_len = 0;

        size_t data_pos = 0;

        bool header_complete = false;

        bool isFirstChunk = true;


        uint32_t read_tag() {

            uint32_t tag = getChar();

            tag = (tag << 8) | getChar();

            tag = (tag << 8) | getChar();

            tag = (tag << 8) | getChar();

            return tag;

        }


        uint32_t read_int32() {

            uint32_t value = (uint32_t)getChar();

            value |= (uint32_t)getChar() << 8;

            value |= (uint32_t)getChar() << 16;

            value |= (uint32_t)getChar() << 24;

            return value;

        }


        uint16_t read_int16() {

            uint16_t value = getChar();

            value |= getChar() << 8;

            return value;

        }


        void skip(int n) {

            n = min((size_t)n, chunk_len - data_pos);

            for (int i=0; i<n; i++) if (data_pos < chunk_len) data_pos++;

            return;

        }


        int getChar() {

            if (data_pos < chunk_len) return chunk_buffer[data_pos++];

            else return -1;

        }


        void logInfo() {

            LOGI("WavIMAHeader format: %d", headerInfo.format);

            LOGI("WavIMAHeader channels: %d", headerInfo.channels);

            LOGI("WavIMAHeader sample_rate: %d", headerInfo.sample_rate);

            LOGI("WavIMAHeader block align: %d", headerInfo.block_align);

            LOGI("WavIMAHeader bits_per_sample: %d", headerInfo.bits_per_sample);

        }

};

class WavIMAHeader {…};


class WavIMADecoder : public AudioDecoder {

    public:


        WavIMADecoder() {

            TRACED();

        }

        WavIMADecoder() {…}


        WavIMADecoder(Print &out_stream, bool active=true) {

            TRACED();

            this->out = &out_stream;

            this->active = active;

        }

        WavIMADecoder(Print &out_stream, bool active=true) {…}


        WavIMADecoder(Print &out_stream, AudioInfoSupport &bi) {

            TRACED();

            this->out = &out_stream;

            addNotifyAudioChange(bi);

        }

        WavIMADecoder(Print &out_stream, AudioInfoSupport &bi) {…}


        ~WavIMADecoder() {

            if (input_buffer != nullptr) delete[] input_buffer;

            if (output_buffer != nullptr) delete[] output_buffer;

        }


        void setOutput(Print &out_stream) {

            this->out = &out_stream;

        }

        void setOutput(Print &out_stream) {…}


        bool begin() {

            TRACED();

            ima_states[0].predictor = 0;

            ima_states[0].step_index = 0;

            ima_states[1].predictor = 0;

            ima_states[1].step_index = 0;

            isFirst = true;

            active = true;

            header.clearHeader();

            return true;

        }


        void end() {

            TRACED();

            active = false;

        }


        const char* mime() {

            return wav_ima_mime;

        }


        WavIMAAudioInfo &audioInfoEx() {

            return header.audioInfo();

        }


        AudioInfo audioInfo() override {

            return header.audioInfo();

        }

        AudioInfo audioInfo() override  {…}


        virtual size_t write(const uint8_t *data, size_t len) {

            TRACED();

            if (active) {

                if (isFirst) {

                    // we expect at least the full header

                    int written = header.write((uint8_t*)data, len);

                    if (written == IMA_ERR_INVALID_CONTAINER || written == IMA_ERR_INVALID_CHUNK) {

                        isValid = false;

                        isFirst = false;

                        LOGE("File is not valid");

                        return len;

                    }


                    if (!header.isDataComplete()) {

                        return len;

                    }


                    size_t len_open = len - written;

                    uint8_t *sound_ptr = (uint8_t *) data + written;

                    isFirst = false;

                    isValid = header.audioInfo().is_valid;


                    LOGI("WAV sample_rate: %d", header.audioInfo().sample_rate);

                    LOGI("WAV data_length: %u", (unsigned) header.audioInfo().data_length);

                    LOGI("WAV is_valid: %s", header.audioInfo().is_valid ? "true" :  "false");


                    isValid = header.audioInfo().is_valid;

                    if (isValid) {

                        if (input_buffer != nullptr) delete[] input_buffer;

                        if (output_buffer != nullptr) delete[] output_buffer;

                        bytes_per_encoded_block = header.audioInfo().block_align;

                        bytes_per_decoded_block = header.audioInfo().frames_per_block * header.audioInfo().channels * 2;

                        samples_per_decoded_block = bytes_per_decoded_block >> 1;

                        input_buffer = new uint8_t[bytes_per_encoded_block];

                        output_buffer = new int16_t[samples_per_decoded_block];

                        // update sampling rate if the target supports it

                        AudioInfo bi;

                        bi.sample_rate = header.audioInfo().sample_rate;

                        bi.channels = header.audioInfo().channels;

                        bi.bits_per_sample = 16;

                        remaining_bytes = header.audioInfo().data_length;

                        notifyAudioChange(bi);

                        // write prm data from first record

                        LOGI("WavIMADecoder writing first sound data");

                        processInput(sound_ptr, len_open);

                    }

                } else if (isValid) {

                    processInput((uint8_t*)data, len);

                }

            }

            return len;

        }


        int readStream(Stream &in) {

            TRACED();

            uint8_t buffer[READ_BUFFER_SIZE];

            int len = in.readBytes(buffer, READ_BUFFER_SIZE);

            return write(buffer, len);

        }

        int readStream(Stream &in) {…}


        virtual operator bool() {

            return active;

        }


    protected:

        WavIMAHeader header;

        Print *out;

        bool isFirst = true;

        bool isValid = true;

        bool active;

        uint8_t *input_buffer = nullptr;

        int32_t input_pos = 0;

        size_t remaining_bytes = 0;

        size_t bytes_per_encoded_block = 0;

        int16_t *output_buffer = nullptr;

        size_t bytes_per_decoded_block = 0;

        size_t samples_per_decoded_block = 0;

        IMAState ima_states[2];


        int16_t decodeSample(uint8_t sample, int channel = 0) {

            int step_index = ima_states[channel].step_index;

            int32_t step = ima_step_table[step_index];

            step_index += ima_index_table[sample];

            if (step_index < 0) step_index = 0;

            else if (step_index > 88) step_index = 88;

            ima_states[channel].step_index = step_index;

            int32_t predictor = ima_states[channel].predictor;

            uint8_t sign = sample & 8;

            uint8_t delta = sample & 7;

            int32_t diff = step >> 3;

            if (delta & 4) diff += step;

            if (delta & 2) diff += (step >> 1);

            if (delta & 1) diff += (step >> 2);

            if (sign) predictor -= diff;

            else predictor += diff;

            if (predictor < -32768) predictor = -32768;

            else if (predictor > 32767) predictor = 32767;

            ima_states[channel].predictor = predictor;

            return (int16_t)predictor;

        }


        void decodeBlock(int channels) {

            if (channels == 0 || channels > 2) return;

            input_pos = 4;

            int output_pos = 1;

            ima_states[0].predictor = (int16_t)((input_buffer[1] << 8) + input_buffer[0]);

            ima_states[0].step_index = input_buffer[2];

            output_buffer[0] = ima_states[0].predictor;

            if (channels == 2) {

                ima_states[1].predictor = (int16_t)(input_buffer[5] << 8) + input_buffer[4];

                ima_states[1].step_index = input_buffer[6];

                output_buffer[1] = ima_states[1].predictor;

                input_pos = 8;

                output_pos = 2;

            }

            for (int i=0; i<samples_per_decoded_block-channels; i++) {

                uint8_t sample = (i & 1) ? input_buffer[input_pos++] >> 4 : input_buffer[input_pos] & 15;

                if (channels == 1) output_buffer[output_pos++] = decodeSample(sample);

                else {

                    output_buffer[output_pos] = decodeSample(sample, (i >> 3) & 1);

                    output_pos += 2;

                    if ((i & 15) == 7) output_pos -= 15;

                    else if ((i & 15) == 15) output_pos--;

                }

            }

        }


        void processInput(const uint8_t* data, size_t size) {

            int max_size = min(size, remaining_bytes);

            for (int i=0; i<max_size; i++) {

                input_buffer[input_pos++] = data[i];

                if (input_pos == bytes_per_encoded_block) {

                    decodeBlock(header.audioInfo().channels);

                    input_pos = 0;

                    out->write((uint8_t*)output_buffer, bytes_per_decoded_block);

                }

            }

            remaining_bytes -= max_size;

            if (remaining_bytes == 0) active = false;

        }

};

class WavIMADecoder : public AudioDecoder {…};


}

audio_tools::AudioDecoder
Decoding of encoded audio into PCM data.
Definition AudioCodecsBase.h:18

audio_tools::AudioInfoSource::addNotifyAudioChange
virtual void addNotifyAudioChange(AudioInfoSupport &bi)
Adds target to be notified about audio changes.
Definition AudioTypes.h:151

audio_tools::AudioInfoSupport
Supports changes to the sampling rate, bits and channels.
Definition AudioTypes.h:133

audio_tools::Print
Definition NoArduino.h:62

audio_tools::Stream
Definition NoArduino.h:142

audio_tools::WavIMADecoder
Obsolete: WavIMADecoder - based on WAVDecoder - We parse the header data as we receive it and send th...
Definition CodecWavIMA.h:286

audio_tools::WavIMADecoder::WavIMADecoder
WavIMADecoder()
Construct a new WavIMADecoder object.
Definition CodecWavIMA.h:292

audio_tools::WavIMADecoder::WavIMADecoder
WavIMADecoder(Print &out_stream, AudioInfoSupport &bi)
Construct a new WavIMADecoder object.
Definition CodecWavIMA.h:314

audio_tools::WavIMADecoder::setOutput
void setOutput(Print &out_stream)
Defines the output Stream.
Definition CodecWavIMA.h:326

audio_tools::WavIMADecoder::readStream
int readStream(Stream &in)
Alternative API which provides the data from an input stream.
Definition CodecWavIMA.h:413

audio_tools::WavIMADecoder::audioInfo
AudioInfo audioInfo() override
provides the actual input AudioInfo
Definition CodecWavIMA.h:355

audio_tools::WavIMADecoder::WavIMADecoder
WavIMADecoder(Print &out_stream, bool active=true)
Construct a new WavIMADecoder object.
Definition CodecWavIMA.h:301

audio_tools::WavIMAHeader
Definition CodecWavIMA.h:75

audio_tools::WavIMAHeader::isDataComplete
bool isDataComplete()
Returns true if the header is complete (data chunk has been found)
Definition CodecWavIMA.h:211

audio_tools
Generic Implementation of sound input and output for desktop environments using portaudio.
Definition AudioCodecsBase.h:10

audio_tools::chunk_result
chunk_result
Parser for Wav header data adjusted for IMA ADPCM format - partially based on CodecWAV....
Definition CodecWavIMA.h:68

audio_tools::AudioInfo
Basic Audio information which drives e.g. I2S.
Definition AudioTypes.h:53

audio_tools::AudioInfo::sample_rate
sample_rate_t sample_rate
Sample Rate: e.g 44100.
Definition AudioTypes.h:55

audio_tools::AudioInfo::channels
uint16_t channels
Number of channels: 2=stereo, 1=mono.
Definition AudioTypes.h:57

audio_tools::AudioInfo::bits_per_sample
uint8_t bits_per_sample
Number of bits per sample (int16_t = 16 bits)
Definition AudioTypes.h:59

audio_tools::IMAState
Definition CodecWavIMA.h:53

audio_tools::WavIMAAudioInfo
Sound information which is available in the WAV header - adjusted for IMA ADPCM.
Definition CodecWavIMA.h:35