arduino-audio-tools
All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends Macros Modules Pages
CodecDSF.h
Go to the documentation of this file.
21#pragma once
22// #pragma GCC optimize("Ofast")
23#pragma GCC optimize("O3")
24
25#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
26#include "AudioTools/CoreAudio/AudioFilter/Filter.h"
27#include "AudioTools/CoreAudio/Buffers.h"
28
36#define DSD_BUFFER_SIZE 1024 * 2
37
38namespace audio_tools {
39
49struct DSFMetadata : public AudioInfo {
50 DSFMetadata() = default;
51 DSFMetadata(int rate) { sample_rate = rate; }
52 uint32_t dsd_sample_rate =
53 0;
54 uint64_t dsd_data_bytes = 0;
55 uint8_t dsd_bits = 1;
56 uint64_t pcm_frames = 0;
57 float duration_sec = 0;
58 uint32_t dsd_buffer_size =
60 float filter_q = 0.5f; //1.41f;
61 float filter_cutoff = 0.4f;
62 int output_buffer_size = 1024;
63};
64
74struct __attribute__((packed)) DSDPrefix {
75 char id[4]; // "DSD "
76 uint64_t chunkSize; // 28
77 uint64_t fileSize; // total file size
78 uint64_t metadataOffset; // offset to "ID3 " chunk (0 if none)
79};
80
82struct __attribute__((packed)) DSFFormat {
83 char id[4]; // "fmt "
84 uint64_t chunkSize; // 52
85 uint32_t formatVersion; // 1
86 uint32_t formatID; // 0
87 uint32_t channelType; // e.g., 2 for stereo
88 uint32_t channelNum; // number of channels
89 uint32_t samplingFrequency; // e.g., 2822400
90 uint32_t bitsPerSample; // 1
91 uint64_t sampleCount; // total samples per channel
92 uint32_t blockSizePerChannel; // e.g., 4096
93 uint32_t reserved; // 0
94};
95
97struct __attribute__((packed)) DSFDataHeader {
98 char id[4]; // "data"
99 uint64_t chunkSize; // size of DSD data
100 // followed by: uint8_t rawData[chunkSize];
101};
102
125class DSFDecoder : public AudioDecoder {
126 public:
127 DSFDecoder() = default;
128 DSFDecoder(DSFMetadata metaData) { setMetaData(metaData); };
129
130 AudioInfo audioInfo() override { return meta; }
131
134 void setAudioInfo(AudioInfo from) override {
135 TRACED();
137 meta.copyFrom(from);
138 if (isHeaderAvailable()){
139 // Ensure PCM buffer is allocated based on the new audio info
140 int buffer_size = getOutputBufferSize();
141 pcmBuffer.resize(buffer_size);
142 channelAccum.resize(meta.channels);
144
147 }
148 }
149
159 bool begin() {
160 TRACED();
163 headerParsed = false;
164 headerSize = 0;
165 dataSize = 0;
166 filePos = 0;
167 decimationStep = 64;
168 max_value = 0;
169
170 // update decimaten step & filter parameters
171 isActive = true;
172
173 return true;
174 }
175
176 void end() override { isActive = false; }
177
185 const DSFMetadata getMetadata() { return meta; }
186
187 void setMetaData(DSFMetadata metaData) {
188 meta = metaData;
190 }
191
200
201 operator bool() { return isActive; }
202
217 size_t write(const uint8_t* data, size_t len) {
218 LOGD("write: %u", (unsigned)len);
219 size_t i = 0;
220
221 // Phase 1: Parse DSF header to extract format information
222 i += processHeader(data, len, i);
223
224 // Phase 2: Process audio data (buffer DSD + convert to PCM)
225 if (headerParsed && i < len) {
226 i += processDSDData(data, len, i);
227 }
228
229 return len; // Always report full consumption for streaming compatibility
230 }
231
232 protected:
233 // Header parsing state
234 size_t headerSize;
235 bool headerParsed = false;
236 bool isActive = false;
237 uint64_t dataSize;
238 size_t filePos;
239
240 // Processing buffers and state
248 uint32_t decimationStep;
251
252 // Metadata
254 float max_value = 0.0f;
255
258 int frame_size = meta.bits_per_sample / 8 * meta.channels;
259 if (meta.bits_per_sample == 24) frame_size = 4 * meta.channels;
260 int buffer_size = frame_size;
261 if (meta.output_buffer_size > buffer_size)
262 buffer_size = meta.output_buffer_size;
263 return buffer_size;
264 }
265
277 size_t processHeader(const uint8_t* data, size_t len, size_t startPos) {
278 if (headerParsed) return 0;
279 LOGI("processHeader: %u (%u)", (unsigned)len, (unsigned)startPos);
280
281 // Check for DSD header magic
282 if (memcmp(data, "DSD ", 4) != 0) {
283 LOGE("Invalid DSF header magic");
284 return 0;
285 }
286
287 int dataPos = findTag("data", data, len);
288 int fmtPos = findTag("fmt ", data, len);
289 if (dataPos < 0 || fmtPos < 0) {
290 LOGE("DSF header not found in data (fmt: %d, data: %d)", fmtPos, dataPos);
291 return 0; // No valid header found
292 }
293 // parse the data
294 parseFMT(data + fmtPos, len - fmtPos);
295 parseData(data + dataPos, len - dataPos);
296 headerParsed = true;
297
298 // update audio info and initialize filters
300
301 return dataPos + sizeof(DSFDataHeader);
302 }
303
314 size_t processDSDData(const uint8_t* data, size_t len, size_t startPos) {
315 LOGD("processDSDData: %u (%u)", (unsigned)len, (unsigned)startPos);
316 size_t bytesProcessed = 0;
317
318 // Buffer as much DSD data as possible
319 bytesProcessed += bufferDSDData(data, len, startPos);
320
321 // Convert buffered DSD data to PCM output
323
324 return bytesProcessed;
325 }
326
337 size_t bufferDSDData(const uint8_t* data, size_t len, size_t startPos) {
338 int write_len = len - startPos;
339 if (write_len > dsdBuffer.availableForWrite()) {
340 write_len = dsdBuffer.availableForWrite();
341 }
342 dsdBuffer.writeArray(data + startPos, write_len);
343 filePos += write_len;
344
345 return write_len;
346 }
347
367 while (hasEnoughData()) {
368 // Initialize accumulators
369 for (int ch = 0; ch < meta.channels; ch++) {
370 channelAccum[ch] = 0.0f;
371 }
372 // Initialize integrator states
373 for (int ch = 0; ch < meta.channels; ch++) {
374 channelIntegrator[ch] = 0.0f;
375 }
376
377 // Accumulate DSD samples over decimation period
378 // DSF uses byte interleaving: bytes alternate between channels
379 int bytesPerDecimationStep = decimationStep / 8;
380 int samplesProcessed = 0;
381
382 for (int i = 0; i < bytesPerDecimationStep && !dsdBuffer.isEmpty(); i++) {
383 for (int ch = 0; ch < meta.channels && !dsdBuffer.isEmpty(); ch++) {
384 uint8_t dsdByte;
385 if (dsdBuffer.read(dsdByte)) {
386 // Each byte contains 8 DSD samples for the current channel
387 // Use integrator-based approach for better DSD conversion
388 for (int bit = 0; bit < 8; bit++) {
389 int channelBit = (dsdByte >> (7 - bit)) & 1; // MSB first in DSF
390
391 // Delta-sigma integration: accumulate the difference
392 channelIntegrator[ch] += channelBit ? 1.0f : -1.0f;
393
394 // Apply decay to prevent DC buildup
395 channelIntegrator[ch] *= 0.9999f;
396 }
397
398 // Add integrated value to channel accumulator
400 samplesProcessed += 8;
401 }
402 }
403 }
404
405 float samplesPerChannel = samplesProcessed / meta.channels;
406
407 if (samplesPerChannel > 0) {
408 for (int ch = 0; ch < meta.channels; ch++) {
409 // Normalize by sample count and apply scaling factor
410 channelAccum[ch] = channelAccum[ch] / samplesPerChannel * 0.8f;
411 if (meta.filter_cutoff > 0.0f &&
412 meta.filter_q > 0.0f) { // Only apply filter if configured
413 // Apply low-pass filter to remove high-frequency noise
414 channelAccum[ch] = channelFilters[ch].process(channelAccum[ch]);
415 }
416 //Serial.print(channelAccum[ch]);
417 //Serial.print(" ");
418
419 // Convert to PCM sample and store in buffer
421 }
422 }
423
424 //Serial.println();
425
426 // Output the PCM samples for all channels
427 if (pcmBuffer.isFull()) {
428 size_t frameSize = pcmBuffer.available();
429 size_t written =
430 getOutput()->write((uint8_t*)pcmBuffer.data(), frameSize);
431 if (written != frameSize) {
432 LOGE(
433 "Failed to write PCM samples: expected %zu bytes, wrote %zu "
434 "bytes",
435 frameSize, written);
436 }
438 }
439 }
440 }
441
450 float clip(float value) {
451 if (value > 1.0f) return 1.0f;
452 if (value < -1.0f) return -1.0f;
453 return value;
454 }
455
465 TRACEI();
466
467 // Initialize filters for the correct number of channels
468 if (meta.sample_rate > 0 && meta.channels > 0) {
469 float cutoffFreq =
470 meta.sample_rate * meta.filter_cutoff; // 40% of Nyquist frequency
472 for (int i = 0; i < meta.channels; i++) {
473 channelFilters[i].begin(cutoffFreq, meta.sample_rate, meta.filter_q);
474 }
475 }
476 }
477
487 TRACEI();
488 if (meta.sample_rate == 0 || meta.dsd_sample_rate == 0) {
489 LOGE("Invalid sample rates: DSD=%u, PCM=%u",
490 (unsigned)meta.dsd_sample_rate, (unsigned)meta.sample_rate);
491 return;
492 }
493
495 if (decimationStep < 64) {
496 LOGW("Decimation step %u too low, setting to 64",
497 (unsigned)decimationStep);
498 decimationStep = 64;
499 }
500 if (decimationStep > 512) {
501 LOGW("Decimation step %u too high, setting to 512",
502 (unsigned)decimationStep);
503 decimationStep = 512;
504 }
505
506 // Ensure decimation step is multiple of 8 for clean byte processing
507 decimationStep = (decimationStep / 8) * 8;
508 if (decimationStep < 64) decimationStep = 64;
509
510 LOGI("Decimation step set to %u for DSD rate %u and target PCM rate %u",
511 (unsigned)decimationStep, (unsigned)meta.dsd_sample_rate,
512 (unsigned)meta.sample_rate);
513 }
514
525 // DSF uses byte interleaving: each decimation step needs enough bytes
526 // to cover all channels. Each byte contains 8 DSD samples for one
527 // channel.
528 int bytesPerDecimationStep = (decimationStep / 8) * meta.channels;
529 if (bytesPerDecimationStep < meta.channels)
530 bytesPerDecimationStep = meta.channels;
531
532 return dsdBuffer.available() >= bytesPerDecimationStep;
533 }
534
540 void writePCMSample(float filteredValue) {
541 switch (meta.bits_per_sample) {
542 case 8: {
543 int8_t buffer8 = static_cast<int8_t>(filteredValue * 127.0f);
544 pcmBuffer.write(buffer8);
545 break;
546 }
547 case 16: {
548 int16_t buffer16 = static_cast<int16_t>(filteredValue * 32767.0f);
549 pcmBuffer.writeArray((uint8_t*)&buffer16, sizeof(int16_t));
550 break;
551 }
552 case 24: {
553 int24_t buffer24 =
554 static_cast<int24_t>(filteredValue * 8388607.0f); // 2^23 - 1
555 pcmBuffer.writeArray((uint8_t*)&buffer24, sizeof(int24_t));
556 break;
557 }
558 case 32: {
559 int32_t buffer32 =
560 static_cast<int32_t>(filteredValue * 2147483647.0f); // 2^31 -
561 pcmBuffer.writeArray((uint8_t*)&buffer32, sizeof(int32_t));
562 break;
563 }
564 default:
565 LOGE("Unsupported bits per sample: %d", meta.bits_per_sample);
566 break;
567 }
568 }
569
580 int findTag(const char* tag, const uint8_t* data, size_t len) {
581 int taglen = strlen(tag);
582 uint32_t* pt;
583 for (int j = 0; j < len - taglen; j++) {
584 if (memcmp(tag, data + j, taglen) == 0) {
585 return j; // Found the tag at position j
586 }
587 }
588 return -1;
589 }
590
601 bool parseFMT(const uint8_t* data, size_t len) {
602 TRACEI();
603 if (len < sizeof(DSFFormat)) {
604 LOGE("FMT section too short to parse DSF format header");
605 return false; // Not enough data to parse
606 }
607 DSFFormat* fmt = (DSFFormat*)data;
608 meta.channels = fmt->channelNum;
609 // Fallback to channel type if channels is 0
610 if (meta.channels == 0) meta.channels = fmt->channelType;
611 meta.dsd_sample_rate = fmt->samplingFrequency;
612
613 // Validate channel count
614 if (meta.channels == 0 || meta.channels > 8) {
615 LOGE("Invalid channel count: %u (must be 1-8)", (unsigned)meta.channels);
616 return false;
617 }
618
619 LOGI("channels: %u, DSD sample rate: %u", (unsigned)meta.channels,
620 (unsigned)meta.dsd_sample_rate);
621 return true;
622 }
623
634 bool parseData(const uint8_t* data, size_t len) {
635 TRACEI();
636 if (len < sizeof(DSFDataHeader)) {
637 LOGE("Data section too short to parse DSF data header");
638 return false; // Not enough data to parse
639 }
640 DSFDataHeader* header = (DSFDataHeader*)data;
641 dataSize = header->chunkSize;
643
644 uint64_t totalBits = dataSize * 8;
645 uint64_t totalDSDSamples = totalBits / meta.channels;
646 uint64_t totalPCMFrames =
647 totalDSDSamples / (meta.dsd_sample_rate / meta.sample_rate);
648 meta.pcm_frames = totalPCMFrames;
649 meta.duration_sec = (float)totalPCMFrames / meta.sample_rate;
650 return true;
651 }
652};
653
654} // namespace audio_tools
#define DSD_BUFFER_SIZE
Buffer size for DSD data processing - must accommodate decimation step.
Definition CodecDSF.h:36
Decoding of encoded audio into PCM data.
Definition AudioCodecsBase.h:18
void setAudioInfo(AudioInfo from) override
for most decoders this is not needed
Definition AudioCodecsBase.h:28
virtual int writeArray(const T data[], int len)
Fills the buffer data.
Definition Buffers.h:55
DSF (DSD Stream File) format decoder.
Definition CodecDSF.h:125
bool isHeaderAvailable()
Check if decoder is ready.
Definition CodecDSF.h:199
bool headerParsed
Flag indicating if header parsing is complete.
Definition CodecDSF.h:235
DSFMetadata meta
Extracted DSF file metadata.
Definition CodecDSF.h:253
void convertDSDToPCM()
Convert buffered DSD data to PCM samples and output them.
Definition CodecDSF.h:366
int getOutputBufferSize()
The buffer size is defined in the metadata: it must be at least 1 frame.
Definition CodecDSF.h:257
size_t headerSize
Current size of accumulated header data.
Definition CodecDSF.h:234
Vector< float > channelIntegrator
Definition CodecDSF.h:249
void setAudioInfo(AudioInfo from) override
Definition CodecDSF.h:134
bool begin()
Initialize the decoder.
Definition CodecDSF.h:159
void setupTargetPCMRate()
Set up low-pass filters for all channels.
Definition CodecDSF.h:464
size_t processDSDData(const uint8_t *data, size_t len, size_t startPos)
Process DSD audio data: buffer it and convert to PCM when possible.
Definition CodecDSF.h:314
const DSFMetadata getMetadata()
Get DSF file metadata.
Definition CodecDSF.h:185
Vector< float > channelAccum
Definition CodecDSF.h:243
float clip(float value)
Clips audio values to valid range.
Definition CodecDSF.h:450
void setupDecimationStep()
Calculate optimal decimation step for DSD to PCM conversion.
Definition CodecDSF.h:486
SingleBuffer< uint8_t > pcmBuffer
Definition CodecDSF.h:241
bool isActive
Flag indicating if decoder is active and ready.
Definition CodecDSF.h:236
bool parseData(const uint8_t *data, size_t len)
Parse DSF data chunk to extract audio data information.
Definition CodecDSF.h:634
uint64_t dataSize
Size of audio data section in bytes.
Definition CodecDSF.h:237
int findTag(const char *tag, const uint8_t *data, size_t len)
Find a specific tag within binary data.
Definition CodecDSF.h:580
size_t filePos
Current position in DSF file.
Definition CodecDSF.h:238
uint32_t decimationStep
Decimation factor for DSD to PCM conversion.
Definition CodecDSF.h:248
RingBuffer< uint8_t > dsdBuffer
Ring buffer for DSD data.
Definition CodecDSF.h:247
Vector< LowPassFilter< float > > channelFilters
Anti-aliasing filters for each channel.
Definition CodecDSF.h:246
size_t bufferDSDData(const uint8_t *data, size_t len, size_t startPos)
Buffer incoming DSD data into ring buffer.
Definition CodecDSF.h:337
size_t processHeader(const uint8_t *data, size_t len, size_t startPos)
Process header data until header is complete or data is exhausted.
Definition CodecDSF.h:277
void writePCMSample(float filteredValue)
Convert filtered DSD value to PCM sample in the buffer.
Definition CodecDSF.h:540
bool parseFMT(const uint8_t *data, size_t len)
Parse DSF format chunk to extract audio parameters.
Definition CodecDSF.h:601
AudioInfo audioInfo() override
provides the actual input AudioInfo
Definition CodecDSF.h:130
bool hasEnoughData()
Check if sufficient DSD data is available for conversion.
Definition CodecDSF.h:524
size_t write(const uint8_t *data, size_t len)
Main entry point for processing incoming DSF data.
Definition CodecDSF.h:217
Implements a typed Ringbuffer.
Definition Buffers.h:327
bool read(T &result) override
reads a single value
Definition Buffers.h:334
virtual int availableForWrite() override
provides the number of entries that are available to write
Definition Buffers.h:399
virtual void reset() override
clears the buffer
Definition Buffers.h:389
virtual bool resize(int len)
Resizes the buffer if supported: returns false if not supported.
Definition Buffers.h:404
virtual int available() override
provides the number of entries that are available to read
Definition Buffers.h:396
A simple Buffer implementation which just uses a (dynamically sized) array.
Definition Buffers.h:172
bool write(T sample) override
write add an entry to the buffer
Definition Buffers.h:202
int available() override
provides the number of entries that are available to read
Definition Buffers.h:229
bool isFull() override
checks if the buffer is full
Definition Buffers.h:236
bool resize(int size)
Resizes the buffer if supported: returns false if not supported.
Definition Buffers.h:292
int writeArray(const T data[], int len) override
Fills the buffer data.
Definition Buffers.h:197
T * data()
Provides address of actual data.
Definition Buffers.h:271
void reset() override
clears the buffer
Definition Buffers.h:273
Vector implementation which provides the most important methods as defined by std::vector....
Definition Vector.h:21
24bit integer which is used for I2S sound processing. The values are represented as int32_t,...
Definition Int24_4bytes_t.h:16
Generic Implementation of sound input and output for desktop environments using portaudio.
Definition AudioCodecsBase.h:10
Basic Audio information which drives e.g. I2S.
Definition AudioTypes.h:53
void copyFrom(AudioInfo info)
Same as set.
Definition AudioTypes.h:103
sample_rate_t sample_rate
Sample Rate: e.g 44100.
Definition AudioTypes.h:55
uint16_t channels
Number of channels: 2=stereo, 1=mono.
Definition AudioTypes.h:57
uint8_t bits_per_sample
Number of bits per sample (int16_t = 16 bits)
Definition AudioTypes.h:59
Metadata structure for DSF (DSD Stream File) format.
Definition CodecDSF.h:49
uint64_t dsd_data_bytes
Size of DSD bitstream data in bytes.
Definition CodecDSF.h:54
uint8_t dsd_bits
BitSize always 1!
Definition CodecDSF.h:55
uint32_t dsd_sample_rate
DSD sample rate (e.g. 2822400 Hz for DSD64)
Definition CodecDSF.h:52
uint32_t dsd_buffer_size
Internal buffer size for DSD processing.
Definition CodecDSF.h:58
uint64_t pcm_frames
Estimated number of PCM frames after conversion.
Definition CodecDSF.h:56
float filter_cutoff
Cutoff frequency as fraction of Nyquist.
Definition CodecDSF.h:61
float duration_sec
Approximate audio duration in seconds.
Definition CodecDSF.h:57