arduino-audio-tools
AudioESP32ULP.h
Go to the documentation of this file.
1 
10 #pragma once
11 
12 #ifndef ESP32
13 #error Only the ESP32 supports ULP audio output
14 #endif
15 
16 #include <driver/dac.h>
17 #include <driver/rtc_io.h>
18 #include <esp32/ulp.h>
19 #include <math.h>
20 #include <soc/rtc.h>
21 
22 namespace audio_tools {
23 
24 enum UlpDac { ULP_DAC1 = 1, ULP_DAC2 = 2 };
25 
35 class AudioESP32ULP : public AudioOutput {
36 public:
37  AudioInfo defaultConfig() {
38  AudioInfo cfg(44100, 2, 16);
39  return cfg;
40  }
41 
43  void setMonoDAC(UlpDac dac){
44  selected_mono_dac = dac;
45  }
46 
48  void setMinWriteBytes(int bytes){
49  min_write_bytes = bytes;
50  }
51 
53  bool begin(AudioInfo info) {
54  TRACEI();
55  cfg = info;
56  stereoOutput = info.channels == 2;
57  activeDACs = stereoOutput ? 3 : selected_mono_dac;
58  hertz = cfg.sample_rate;
59 
60  if (info.bits_per_sample != 16) {
61  LOGE("Unsupported bits_per_sample: %d", info.bits_per_sample);
62  return false;
63  }
64  return setup();
65  }
66 
67  size_t write(const uint8_t *data, size_t len) {
68  TRACED();
69  int16_t *data_16 = (int16_t *)data;
70  size_t result = 0;
71  int16_t stereo[2];
72  int frameSize = cfg.channels * sizeof(int16_t);
73  int frames = len / frameSize;
74  for (int j = 0; j < frames; j++) {
75  int pos = j * cfg.channels;
76  stereo[0] = data_16[pos];
77  stereo[1] = stereoOutput ? data_16[pos + 1] : data_16[pos];
78  // blocking write
79  while (!writeFrame(stereo)) {
80  delay(20);
81  }
82  result += frameSize;
83  }
84  return result;
85  }
86 
87  int availableForWrite() {
88  int result = totalSampleWords-lastFilledWord;
89  return result < min_write_bytes ? 0 : result;
90  }
91 
92  void end() {
93  TRACEI();
94  const ulp_insn_t stopulp[] = {// stop the timer
95  I_END(),
96  // end the program
97  I_HALT()};
98 
99  size_t load_addr = 0;
100  size_t size = sizeof(stopulp) / sizeof(ulp_insn_t);
101  ulp_process_macros_and_load(load_addr, stopulp, &size);
102 
103  // start
104  ulp_run(0);
105 
106  if (activeDACs & 1) {
107  dac_output_voltage(DAC_CHANNEL_1, 128);
108  }
109  if (activeDACs & 2) {
110  dac_output_voltage(DAC_CHANNEL_2, 128);
111  }
112  }
113 
114 
115 protected:
116  int lastFilledWord = 0;
117  int hertz;
118  int min_write_bytes = 128;
119  UlpDac selected_mono_dac = ULP_DAC1;
120  uint8_t bufferedOddSample = 128;
121  bool waitingOddSample = true; // must be set to false for mono output
122  int activeDACs = 3; // 1:DAC1; 2:DAC2; 3:both;
123  bool stereoOutput = true;
124  const int opcodeCount = 20;
125  const uint32_t dacTableStart1 = 2048 - 512;
126  const uint32_t dacTableStart2 = dacTableStart1 - 512;
127  uint32_t totalSampleWords =
128  2048 - 512 - 512 - (opcodeCount + 1); // add 512 for mono
129  const int totalSamples = totalSampleWords * 2;
130  const uint32_t indexAddress = opcodeCount;
131  const uint32_t bufferStart = indexAddress + 1;
132 
133  bool setup() {
134  TRACED();
135  if (!stereoOutput) {
136  waitingOddSample = false;
137  // totalSampleWords += 512;
138  // dacTableStart2 = dacTableStart1;
139  }
140 
141  // calculate the actual ULP clock
142  unsigned long rtc_8md256_period = rtc_clk_cal(RTC_CAL_8MD256, 1000);
143  unsigned long rtc_fast_freq_hz =
144  1000000ULL * (1 << RTC_CLK_CAL_FRACT) * 256 / rtc_8md256_period;
145 
146  // initialize DACs
147  if (activeDACs & 1) {
148  dac_output_enable(DAC_CHANNEL_1);
149  dac_output_voltage(DAC_CHANNEL_1, 128);
150  }
151  if (activeDACs & 2) {
152  dac_output_enable(DAC_CHANNEL_2);
153  dac_output_voltage(DAC_CHANNEL_2, 128);
154  }
155 
156  int retAddress1 = 9;
157  int retAddress2 = 14;
158 
159  int loopCycles = 134;
160  int loopHalfCycles1 = 90;
161  int loopHalfCycles2 = 44;
162 
163  LOGI("Real RTC clock: %d", rtc_fast_freq_hz);
164 
165  uint32_t dt = (rtc_fast_freq_hz / hertz) - loopCycles;
166  uint32_t dt2 = 0;
167  if (!stereoOutput) {
168  dt = (rtc_fast_freq_hz / hertz) - loopHalfCycles1;
169  dt2 = (rtc_fast_freq_hz / hertz) - loopHalfCycles2;
170  }
171 
172  LOGI("dt: %d", dt);
173  LOGI("dt2: %d", dt2);
174 
175  const ulp_insn_t stereo[] = {
176  // reset offset register
177  I_MOVI(R3, 0),
178  // delay to get the right sampling rate
179  I_DELAY(dt), // 6 + dt
180  // reset sample index
181  I_MOVI(R0, 0), // 6
182  // write the index back to memory for the main cpu
183  I_ST(R0, R3, indexAddress), // 8
184  // load the samples
185  I_LD(R1, R0, bufferStart), // 8
186  // mask the lower 8 bits
187  I_ANDI(R2, R1, 0x00ff), // 6
188  // multiply by 2
189  I_LSHI(R2, R2, 1), // 6
190  // add start position
191  I_ADDI(R2, R2, dacTableStart1), // 6
192  // jump to the dac opcode
193  I_BXR(R2), // 4
194  // back from first dac
195  // delay between the two samples in mono rendering
196  I_DELAY(dt2), // 6 + dt2
197  // mask the upper 8 bits
198  I_ANDI(R2, R1, 0xff00), // 6
199  // shift the upper bits to right and multiply by 2
200  I_RSHI(R2, R2, 8 - 1), // 6
201  // add start position of second dac table
202  I_ADDI(R2, R2, dacTableStart2), // 6
203  // jump to the dac opcode
204  I_BXR(R2), // 4
205  // here we get back from writing the second sample
206  // load 0x8080 as sample
207  I_MOVI(R1, 0x8080), // 6
208  // write 0x8080 in the sample buffer
209  I_ST(R1, R0, indexAddress), // 8
210  // increment the sample index
211  I_ADDI(R0, R0, 1), // 6
212  // if reached end of the buffer, jump relative to index reset
213  I_BGE(-16, totalSampleWords), // 4
214  // wait to get the right sample rate (2 cycles more to compensate the
215  // index reset)
216  I_DELAY((unsigned int)dt + 2), // 8 + dt
217  // if not, jump absolute to where index is written to memory
218  I_BXI(3) // 4
219  };
220  // write io and jump back another 12 + 4 + 12 + 4
221 
222  size_t load_addr = 0;
223  size_t size = sizeof(stereo) / sizeof(ulp_insn_t);
224  ulp_process_macros_and_load(load_addr, stereo, &size);
225  // this is how to get the opcodes
226  // for(int i = 0; i < size; i++)
227  // Serial.println(RTC_SLOW_MEM[i], HEX);
228 
229  // create DAC opcode tables
230  switch (activeDACs) {
231  case 1:
232  for (int i = 0; i < 256; i++) {
233  RTC_SLOW_MEM[dacTableStart1 + i * 2] = create_I_WR_REG(
234  RTC_IO_PAD_DAC1_REG, 19, 26, i); // dac1: 0x1D4C0121 | (i << 10)
235  RTC_SLOW_MEM[dacTableStart1 + 1 + i * 2] =
236  create_I_BXI(retAddress1); // 0x80000000 + retAddress1 * 4
237  RTC_SLOW_MEM[dacTableStart2 + i * 2] = create_I_WR_REG(
238  RTC_IO_PAD_DAC1_REG, 19, 26, i); // dac2: 0x1D4C0122 | (i << 10)
239  RTC_SLOW_MEM[dacTableStart2 + 1 + i * 2] =
240  create_I_BXI(retAddress2); // 0x80000000 + retAddress2 * 4
241  }
242  break;
243  case 2:
244  for (int i = 0; i < 256; i++) {
245  RTC_SLOW_MEM[dacTableStart1 + i * 2] = create_I_WR_REG(
246  RTC_IO_PAD_DAC2_REG, 19, 26, i); // dac1: 0x1D4C0121 | (i << 10)
247  RTC_SLOW_MEM[dacTableStart1 + 1 + i * 2] =
248  create_I_BXI(retAddress1); // 0x80000000 + retAddress1 * 4
249  RTC_SLOW_MEM[dacTableStart2 + i * 2] = create_I_WR_REG(
250  RTC_IO_PAD_DAC2_REG, 19, 26, i); // dac2: 0x1D4C0122 | (i << 10)
251  RTC_SLOW_MEM[dacTableStart2 + 1 + i * 2] =
252  create_I_BXI(retAddress2); // 0x80000000 + retAddress2 * 4
253  }
254  break;
255  case 3:
256  for (int i = 0; i < 256; i++) {
257  RTC_SLOW_MEM[dacTableStart1 + i * 2] = create_I_WR_REG(
258  RTC_IO_PAD_DAC1_REG, 19, 26, i); // dac1: 0x1D4C0121 | (i << 10)
259  RTC_SLOW_MEM[dacTableStart1 + 1 + i * 2] =
260  create_I_BXI(retAddress1); // 0x80000000 + retAddress1 * 4
261  RTC_SLOW_MEM[dacTableStart2 + i * 2] = create_I_WR_REG(
262  RTC_IO_PAD_DAC1_REG, 19, 26, i); // dac2: 0x1D4C0122 | (i << 10)
263  RTC_SLOW_MEM[dacTableStart2 + 1 + i * 2] =
264  create_I_BXI(retAddress2); // 0x80000000 + retAddress2 * 4
265  }
266  break;
267  }
268 
269  // set all samples to 128 (silence)
270  for (int i = 0; i < totalSampleWords; i++)
271  RTC_SLOW_MEM[bufferStart + i] = 0x8080;
272 
273  // start
274  RTC_SLOW_MEM[indexAddress] = 0;
275  ulp_run(0);
276 
277  // wait until ULP starts using samples and the index of output sample
278  // advances
279  while (RTC_SLOW_MEM[indexAddress] == 0)
280  delay(1);
281 
282  return true;
283  }
284 
285  bool writeFrame(int16_t sample[2]) {
286  TRACED();
287  int16_t ms[2];
288  ms[0] = sample[0];
289  ms[1] = sample[1];
290 
291  // TODO: needs improvement (counting is different here with respect to ULP
292  // code)
293  int currentSample = RTC_SLOW_MEM[indexAddress] & 0xffff;
294  int currentWord = currentSample >> 1;
295 
296  for (int i = 0; i < 2; i++) {
297  ms[i] = ((ms[i] >> 8) + 128) & 0xff;
298  }
299  if (!stereoOutput) // mix both channels
300  ms[0] =
301  (uint16_t)(((uint32_t)((int32_t)(ms[0]) + (int32_t)(ms[1])) >> 1) &
302  0xff);
303 
304  if (waitingOddSample) { // always true for stereo because samples are
305  // consumed in pairs
306  if (lastFilledWord !=
307  currentWord) // accept sample if writing index lastFilledWord has not
308  // reached index of output sample
309  {
310  unsigned int w;
311  if (stereoOutput) {
312  w = ms[0];
313  w |= ms[1] << 8;
314  } else {
315  w = bufferedOddSample;
316  w |= ms[0] << 8;
317  bufferedOddSample = 128;
318  waitingOddSample = false;
319  }
320  RTC_SLOW_MEM[bufferStart + lastFilledWord] = w;
321  lastFilledWord++;
322  if (lastFilledWord == totalSampleWords)
323  lastFilledWord = 0;
324  return true;
325  } else {
326  return false;
327  }
328  } else {
329  bufferedOddSample = ms[0];
330  waitingOddSample = true;
331  return true;
332  }
333  }
334 
335  uint32_t create_I_WR_REG(uint32_t reg, uint32_t low_bit, uint32_t high_bit,
336  uint32_t val) {
337  typedef union {
338  ulp_insn_t ulp_ins;
339  uint32_t ulp_bin;
340  } ulp_union;
341  const ulp_insn_t singleinstruction[] = {
342  I_WR_REG(reg, low_bit, high_bit, val)};
343  ulp_union recover_ins;
344  recover_ins.ulp_ins = singleinstruction[0];
345  return (uint32_t)(recover_ins.ulp_bin);
346  }
347 
348  uint32_t create_I_BXI(uint32_t imm_pc) {
349  typedef union {
350  ulp_insn_t ulp_ins;
351  uint32_t ulp_bin;
352  } ulp_union;
353  const ulp_insn_t singleinstruction[] = {I_BXI(imm_pc)};
354  ulp_union recover_ins;
355  recover_ins.ulp_ins = singleinstruction[0];
356  return (uint32_t)(recover_ins.ulp_bin);
357  }
358 };
359 
360 }
Outputs to ESP32 DAC through the ULP (Ultra> Low Power coprocessor), freeing I2S for other uses....
Definition: AudioESP32ULP.h:35
void setMonoDAC(UlpDac dac)
Selects the DAC when we have a mono signal.
Definition: AudioESP32ULP.h:43
void setMinWriteBytes(int bytes)
Selects the limit for the availableForWrite to report the data.
Definition: AudioESP32ULP.h:48
bool begin(AudioInfo info)
Starts the processing. I the output is mono, we can determine the output pin by selecting DAC1 (gpio2...
Definition: AudioESP32ULP.h:53
Abstract Audio Ouptut class.
Definition: AudioOutput.h:22
Generic Implementation of sound input and output for desktop environments using portaudio.
Definition: AudioConfig.h:868
Basic Audio information which drives e.g. I2S.
Definition: AudioTypes.h:52
sample_rate_t sample_rate
Sample Rate: e.g 44100.
Definition: AudioTypes.h:55
uint16_t channels
Number of channels: 2=stereo, 1=mono.
Definition: AudioTypes.h:57
uint8_t bits_per_sample
Number of bits per sample (int16_t = 16 bits)
Definition: AudioTypes.h:59