arduino-audio-tools
Loading...
Searching...
No Matches
MDFEchoCancellation.h
1/* Copyright (C) 2003-2008 Jean-Marc Valin
2 * Copyright (C) 2024 Phil Schatzmann (Header-only adaptation)
3 *
4 * Echo canceller based on the MDF algorithm
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * 3. The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
23 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#pragma once
33
34#include <algorithm>
35#include <cmath>
36#include <cstdint>
37#include <cstdlib>
38#include <cstring>
39#include <vector>
40
41#include "MDFEchoCancellationConfig.h"
42#include "AudioTools/AudioLibs/AudioFFT.h"
43
44// Control requests
45#define ECHO_GET_FRAME_SIZE 3
46#define ECHO_SET_SAMPLING_RATE 24
47#define ECHO_GET_SAMPLING_RATE 25
48#define ECHO_GET_IMPULSE_RESPONSE_SIZE 27
49#define ECHO_GET_IMPULSE_RESPONSE 29
50
51// Helper macros for floating point mode
52#ifndef FIXED_POINT
53#define PSEUDOFLOAT(x) (x)
54#define FLOAT_MULT(a, b) ((a) * (b))
55#define FLOAT_AMULT(a, b) ((a) * (b))
56#define FLOAT_MUL32(a, b) ((a) * (b))
57#define FLOAT_DIV32(a, b) ((a) / (b))
58#define FLOAT_EXTRACT16(a) (a)
59#define FLOAT_EXTRACT32(a) (a)
60#define FLOAT_ADD(a, b) ((a) + (b))
61#define FLOAT_SUB(a, b) ((a) - (b))
62#define FLOAT_DIV32_FLOAT(a, b) ((a) / (b))
63#define FLOAT_MUL32U(a, b) ((a) * (b))
64#define FLOAT_SHL(a, b) (a)
65#define FLOAT_LT(a, b) ((a) < (b))
66#define FLOAT_GT(a, b) ((a) > (b))
67#define FLOAT_DIVU(a, b) ((a) / (b))
68#define FLOAT_SQRT(a) (sqrtf(a))
69
70// Math operation macros for floating point
71#define ABS(x) (((x) < 0) ? (-(x)) : (x))
72#define ABS16(x) (((x) < 0) ? (-(x)) : (x))
73#define ABS32(x) (((x) < 0) ? (-(x)) : (x))
74#define MIN16(a, b) ((a) < (b) ? (a) : (b))
75#define MAX16(a, b) ((a) > (b) ? (a) : (b))
76#define MIN32(a, b) ((a) < (b) ? (a) : (b))
77#define MAX32(a, b) ((a) > (b) ? (a) : (b))
78
79#define QCONST16(x, bits) (x)
80#define QCONST32(x, bits) (x)
81#define NEG16(x) (-(x))
82#define NEG32(x) (-(x))
83#define EXTRACT16(x) (x)
84#define EXTEND32(x) (x)
85#define SHR16(a, shift) (a)
86#define SHL16(a, shift) (a)
87#define SHR32(a, shift) (a)
88#define SHL32(a, shift) (a)
89#define PSHR16(a, shift) (a)
90#define PSHR32(a, shift) (a)
91#define VSHR32(a, shift) (a)
92#define SATURATE16(x, a) (x)
93#define SATURATE32(x, a) (x)
94#define ADD16(a, b) ((a) + (b))
95#define SUB16(a, b) ((a) - (b))
96#define ADD32(a, b) ((a) + (b))
97#define SUB32(a, b) ((a) - (b))
98#define MULT16_16(a, b) ((echo_word32_t)(a) * (echo_word32_t)(b))
99#define MAC16_16(c, a, b) ((c) + (echo_word32_t)(a) * (echo_word32_t)(b))
100#define MULT16_32_Q15(a, b) ((a) * (b))
101#define MULT16_16_P15(a, b) ((a) * (b))
102#define MULT16_32_P15(a, b) ((a) * (b))
103#define MULT16_16_Q15(a, b) ((a) * (b))
104#define DIV32_16(a, b) (((echo_word32_t)(a)) / (echo_word16_t)(b))
105#define DIV32(a, b) (((echo_word32_t)(a)) / (echo_word32_t)(b))
106#define WORD2INT(x) \
107 ((x) < -32767.5f \
108 ? -32768 \
109 : ((x) > 32766.5f ? 32767 : (echo_int16_t)floorf(.5f + (x))))
110#define MULT16_16_Q14(a, b) ((a) * (b))
111#define MULT16_16_Q13(a, b) ((a) * (b))
112#define MULT16_16_P13(a, b) ((a) * (b))
113#define MULT16_16_P14(a, b) ((a) * (b))
114#define MULT16_32_Q14(a, b) ((a) * (b))
115#define MAC16_32_Q15(c, a, b) ((c) + (a) * (b))
116
117#define TOP16(x) (x)
118#define WEIGHT_SHIFT 0
119
120#else
121// Fixed-point mode macros
122#define PSEUDOFLOAT(x) (echo_float_from_double(x))
123#define FLOAT_MULT(a, b) (echo_float_mult(a, b))
124#define FLOAT_MUL32U(a, b) (echo_float_mul32_u(a, b))
125#define FLOAT_ADD(a, b) (echo_float_add(a, b))
126#define FLOAT_SUB(a, b) (echo_float_sub(a, b))
127#define FLOAT_LT(a, b) (echo_float_lt(a, b))
128#define FLOAT_GT(a, b) (echo_float_gt(a, b))
129#define FLOAT_DIVU(a, b) (echo_float_divu(a, b))
130#define FLOAT_SQRT(a) (echo_float_sqrt(a))
131#define FLOAT_EXTRACT16(a) (echo_float_extract16(a))
132#define FLOAT_SHL(a, b) (echo_float_shl(a, b))
133
134// Math operation macros (same for fixed-point)
135#define ABS(x) (((x) < 0) ? (-(x)) : (x))
136#define ABS16(x) (((x) < 0) ? (-(x)) : (x))
137#define ABS32(x) (((x) < 0) ? (-(x)) : (x))
138#define MIN16(a, b) ((a) < (b) ? (a) : (b))
139#define MAX16(a, b) ((a) > (b) ? (a) : (b))
140#define MIN32(a, b) ((a) < (b) ? (a) : (b))
141#define MAX32(a, b) ((a) > (b) ? (a) : (b))
142
143#define WORD2INT(x) \
144 ((x) < -32767.5f \
145 ? -32768 \
146 : ((x) > 32766.5f ? 32767 : (echo_int16_t)floorf(.5f + (x))))
147
148#define TOP16(x) (x)
149#define WEIGHT_SHIFT 0
150
151#endif // !FIXED_POINT
152
153// Forward declaration for FFT interface
154namespace audio_tools {
155
156// Type definitions (must be before echo_float_t struct)
157typedef int16_t echo_int16_t;
158typedef uint16_t echo_uint16_t;
159typedef int32_t echo_int32_t;
160typedef uint32_t echo_uint32_t;
161
162typedef echo_int16_t echo_word16_t;
163typedef echo_int32_t echo_word32_t;
164typedef echo_word32_t echo_mem_t;
165
166#ifdef FIXED_POINT
167// Forward declarations for operators
168struct echo_float_t;
169inline echo_float_t echo_float_mult(echo_float_t a, echo_float_t b);
170inline echo_float_t echo_float_mul32_u(float a, float b);
171
172// Fixed-point type for pseudo-float operations
173typedef struct echo_float_t {
174 int16_t m; // Mantissa
175 int16_t e; // Exponent
176
177 // Constructor for initialization
178 constexpr echo_float_t(int16_t mantissa = 0, int16_t exponent = 0)
179 : m(mantissa), e(exponent) {}
180
181 // Operator overloads for arithmetic
182 inline echo_float_t operator*(const echo_float_t& other) const {
183 return echo_float_mult(*this, other);
184 }
185
186 inline echo_word32_t operator*(int16_t scalar) const {
187 // Result is a regular integer when multiplying float by int
188 int32_t result = ((int32_t)m * scalar);
189 if (e >= 14) {
190 return result << (e - 14);
191 } else {
192 return result >> (14 - e);
193 }
194 }
195
196 inline echo_word32_t operator*(int32_t scalar) const {
197 // Result is a regular integer when multiplying float by int
198 int64_t result = ((int64_t)m * scalar);
199 if (e >= 14) {
200 return result << (e - 14);
201 } else {
202 return result >> (14 - e);
203 }
204 }
205
206 friend inline echo_word32_t operator*(int32_t scalar, const echo_float_t& f) {
207 return f * scalar;
208 }
210
211// Fixed-point arithmetic helper functions
212inline echo_float_t echo_float_from_double(double x) {
213 echo_float_t r;
214 if (x == 0) {
215 r.m = 0;
216 r.e = 0;
217 return r;
218 }
219 int e = 0;
220 while (x >= 32768) { x *= 0.5; e++; }
221 while (x < 16384) { x *= 2.0; e--; }
222 r.m = (int16_t)x;
223 r.e = (int16_t)e;
224 return r;
225}
226
227inline echo_float_t echo_float_mult(echo_float_t a, echo_float_t b) {
228 echo_float_t r;
229 r.m = (int16_t)((((int32_t)a.m) * b.m) >> 15);
230 r.e = a.e + b.e + 15;
231 if (r.m > 0) {
232 while (r.m < 16384) { r.m <<= 1; r.e--; }
233 }
234 return r;
235}
236
237inline echo_float_t echo_float_mul32_u(float a, float b) {
238 return echo_float_from_double(a * b);
239}
240
241inline echo_float_t echo_float_add(echo_float_t a, echo_float_t b) {
242 if (a.e > b.e) {
243 int shift = a.e - b.e;
244 if (shift > 15) return a;
245 echo_float_t r = {(int16_t)(a.m + (b.m >> shift)), a.e};
246 return r;
247 } else {
248 int shift = b.e - a.e;
249 if (shift > 15) return b;
250 echo_float_t r = {(int16_t)((a.m >> shift) + b.m), b.e};
251 return r;
252 }
253}
254
255inline echo_float_t echo_float_sub(echo_float_t a, echo_float_t b) {
256 if (a.e > b.e) {
257 int shift = a.e - b.e;
258 if (shift > 15) return a;
259 echo_float_t r = {(int16_t)(a.m - (b.m >> shift)), a.e};
260 return r;
261 } else {
262 int shift = b.e - a.e;
263 if (shift > 15) {
264 echo_float_t r = {(int16_t)(-b.m), b.e};
265 return r;
266 }
267 echo_float_t r = {(int16_t)((a.m >> shift) - b.m), b.e};
268 return r;
269 }
270}
271
272inline bool echo_float_lt(echo_float_t a, echo_float_t b) {
273 if (a.e != b.e) return a.e < b.e;
274 return a.m < b.m;
275}
276
277inline bool echo_float_gt(echo_float_t a, echo_float_t b) {
278 if (a.e != b.e) return a.e > b.e;
279 return a.m > b.m;
280}
281
282inline echo_float_t echo_float_divu(echo_float_t a, echo_float_t b) {
283 echo_float_t r;
284 if (b.m == 0) {
285 r.m = 32767;
286 r.e = 15;
287 return r;
288 }
289 r.m = (int16_t)((((int32_t)a.m) << 15) / b.m);
290 r.e = a.e - b.e - 15;
291 if (r.m > 0) {
292 while (r.m < 16384) { r.m <<= 1; r.e--; }
293 }
294 return r;
295}
296
297inline echo_float_t echo_float_sqrt(echo_float_t x) {
298 double val = x.m * pow(2.0, x.e - 14);
299 return echo_float_from_double(sqrt(val));
300}
301
302inline int16_t echo_float_extract16(echo_float_t x) {
303 if (x.e > 0) {
304 return x.m << (x.e > 15 ? 15 : x.e);
305 } else {
306 return x.m >> (-x.e > 15 ? 15 : -x.e);
307 }
308}
309
310inline echo_float_t echo_float_shl(echo_float_t x, int bits) {
311 echo_float_t r = {x.m, (int16_t)(x.e + bits)};
312 return r;
313}
314
315
317static const echo_float_t MIN_LEAK = {164, -15};
318
320static const echo_float_t VAR1_SMOOTH = {11796, -15};
321
323static const echo_float_t VAR2_SMOOTH = {23674, -15};
324
326static const echo_float_t VAR1_UPDATE = {16384, -15};
327
329static const echo_float_t VAR2_UPDATE = {8192, -15};
330
332static const echo_float_t VAR_BACKTRACK = {16384, 2};
333
334#else
335// Floating-point mode
336
338static const float MIN_LEAK = 0.005f;
339
341static const float VAR1_SMOOTH = 0.36f;
342
344static const float VAR2_SMOOTH = 0.7225f;
345
347static const float VAR1_UPDATE = 0.5f;
348
350static const float VAR2_UPDATE = 0.25f;
351
353static const float VAR_BACKTRACK = 4.0f;
354
355#endif // FIXED_POINT
356
357
363// Floating point type definition (for non-FIXED_POINT mode)
364#ifndef FIXED_POINT
365typedef float echo_float_t;
366#endif
367
381 int window_size;
382 int M;
383 int cancel_count;
384 int adapted;
385 int saturated;
386 int screwed_up;
387 int C;
388 int K;
389 echo_int32_t sampling_rate;
390 echo_word16_t spec_average;
391 echo_word16_t beta0;
392 echo_word16_t beta_max;
393 echo_word32_t sum_adapt;
394 echo_word16_t leak_estimate;
395
396 echo_word16_t* e; /* scratch */
397 echo_word16_t* x; /* Far-end input buffer (2N) */
398 echo_word16_t* X; /* Far-end buffer (M+1 frames) in frequency domain */
399 echo_word16_t* input; /* scratch */
400 echo_word16_t* y; /* scratch */
401 echo_word16_t* last_y;
402 echo_word16_t* Y; /* scratch */
403 echo_word16_t* E;
404 echo_word32_t* PHI; /* scratch */
405 echo_word32_t* W; /* (Background) filter weights */
406#ifdef TWO_PATH
407 echo_word16_t* foreground; /* Foreground filter weights */
408 echo_word32_t
409 Davg1; /* 1st recursive average of the residual power difference */
410 echo_word32_t
411 Davg2; /* 2nd recursive average of the residual power difference */
412 echo_float_t Dvar1; /* Estimated variance of 1st estimator */
413 echo_float_t Dvar2; /* Estimated variance of 2nd estimator */
414#endif
415 echo_word32_t* power; /* Power of the far-end signal */
416 echo_float_t* power_1; /* Inverse power of far-end */
417 echo_word16_t* wtmp; /* scratch */
418 echo_word32_t* Rf; /* scratch */
419 echo_word32_t* Yf; /* scratch */
420 echo_word32_t* Xf; /* scratch */
421 echo_word32_t* Eh;
422 echo_word32_t* Yh;
423 echo_float_t Pey;
424 echo_float_t Pyy;
425 echo_word16_t* window;
426 echo_word16_t* prop;
427 void* fft_table;
428 echo_word16_t *memX, *memD, *memE;
429 echo_word16_t preemph;
430 echo_word16_t notch_radius;
431 echo_mem_t* notch_mem;
432
433 echo_int16_t* play_buf;
434 int play_buf_pos;
435 int play_buf_started;
436};
437
438typedef struct EchoState_ EchoState;
439
450template <typename Allocator = std::allocator<uint8_t>>
451struct fft_state {
452 using FloatAllocator = typename std::allocator_traits<Allocator>::template rebind_alloc<float>;
455 int N;
456 std::vector<float, FloatAllocator> temp_real;
457 std::vector<float, FloatAllocator> temp_img;
465 fft_state(int size, AudioFFTBase* drv, const Allocator& alloc = Allocator())
466 : N(size), driver(drv),
467 temp_real(FloatAllocator(alloc)),
468 temp_img(FloatAllocator(alloc)) {
469 temp_real.resize(size);
470 temp_img.resize(size);
471 }
472};
473
507template <typename Allocator = std::allocator<uint8_t>>
509 public:
515 MDFEchoCancellation(int filterLength, AudioFFTBase& fftDriver, const Allocator& alloc = Allocator())
516 : fft_driver(&fftDriver), allocator(alloc),
517 filter_length(filterLength), nb_mic(1), nb_speakers(1) {}
518
526 MDFEchoCancellation(int filterLength, int nbMic, int nbSpeakers,
527 AudioFFTBase& fftDriver, const Allocator& alloc = Allocator())
528 : fft_driver(&fftDriver), allocator(alloc),
529 filter_length(filterLength), nb_mic(nbMic), nb_speakers(nbSpeakers) {}
530
533 if (!state) return;
534
535 if (state->fft_table) echo_fft_destroy<Allocator>(state->fft_table);
536
537 echoFree(state->e);
538 echoFree(state->x);
539 echoFree(state->input);
540 echoFree(state->y);
541 echoFree(state->last_y);
542 echoFree(state->Yf);
543 echoFree(state->Rf);
544 echoFree(state->Xf);
545 echoFree(state->Yh);
546 echoFree(state->Eh);
547 echoFree(state->X);
548 echoFree(state->Y);
549 echoFree(state->E);
550 echoFree(state->W);
551#ifdef TWO_PATH
552 echoFree(state->foreground);
553#endif
554 echoFree(state->PHI);
555 echoFree(state->power);
556 echoFree(state->power_1);
557 echoFree(state->window);
558 echoFree(state->prop);
559 echoFree(state->wtmp);
560 echoFree(state->memX);
561 echoFree(state->memD);
562 echoFree(state->memE);
563 echoFree(state->notch_mem);
564 echoFree(state->play_buf);
565 delete state;
566 state = nullptr;
567 }
568
575 void cancel(const echo_int16_t* rec, const echo_int16_t* play,
576 echo_int16_t* out) {
578 echoCancellationImpl(state, rec, play, out);
579 }
580
585 void capture(const echo_int16_t* rec, echo_int16_t* out) {
587 state->play_buf_started = 1;
588 if (state->play_buf_pos >= state->frame_size) {
589 echoCancellationImpl(state, rec, state->play_buf, out);
590 state->play_buf_pos -= state->frame_size;
591 std::memmove(state->play_buf,
592 &state->play_buf[state->frame_size],
593 state->play_buf_pos * sizeof(echo_int16_t));
594 } else {
595 echoWarning("No playback frame available");
596 if (state->play_buf_pos != 0) {
597 echoWarning("Internal playback buffer corruption");
598 state->play_buf_pos = 0;
599 }
600 for (int i = 0; i < state->frame_size; i++) out[i] = rec[i];
601 }
602 }
603
607 void playback(const echo_int16_t* play) {
609 if (!state->play_buf_started) {
610 echoWarning("Discarded first playback frame");
611 return;
612 }
613 if (state->play_buf_pos <= PLAYBACK_DELAY * state->frame_size) {
614 for (int i = 0; i < state->frame_size; i++)
615 state->play_buf[state->play_buf_pos + i] = play[i];
616 state->play_buf_pos += state->frame_size;
617 if (state->play_buf_pos <= (PLAYBACK_DELAY - 1) * state->frame_size) {
618 echoWarning("Auto-filling buffer");
619 for (int i = 0; i < state->frame_size; i++)
620 state->play_buf[state->play_buf_pos + i] = play[i];
621 state->play_buf_pos += state->frame_size;
622 }
623 } else {
624 echoWarning("Had to discard playback frame");
625 }
626 }
627
629 void reset() {
631 int N = state->window_size;
632 int M = state->M;
633 int C = state->C;
634 int K = state->K;
635
636 state->cancel_count = 0;
637 state->screwed_up = 0;
638
639 for (int i = 0; i < N * M; i++) state->W[i] = 0;
640#ifdef TWO_PATH
641 for (int i = 0; i < N * M; i++) state->foreground[i] = 0;
642#endif
643 for (int i = 0; i < N * (M + 1); i++) state->X[i] = 0;
644 for (int i = 0; i <= state->frame_size; i++) {
645 state->power[i] = 0;
646 state->power_1[i] = FLOAT_ONE;
647 state->Eh[i] = 0;
648 state->Yh[i] = 0;
649 }
650 for (int i = 0; i < state->frame_size; i++) state->last_y[i] = 0;
651 for (int i = 0; i < N * C; i++) state->E[i] = 0;
652 for (int i = 0; i < N * K; i++) state->x[i] = 0;
653 for (int i = 0; i < 2 * C; i++) state->notch_mem[i] = 0;
654 for (int i = 0; i < C; i++) state->memD[i] = state->memE[i] = 0;
655 for (int i = 0; i < K; i++) state->memX[i] = 0;
656
657 state->saturated = 0;
658 state->adapted = 0;
659 state->sum_adapt = 0;
660 state->Pey = state->Pyy = FLOAT_ONE;
661#ifdef TWO_PATH
662 state->Davg1 = state->Davg2 = 0;
663 state->Dvar1 = state->Dvar2 = FLOAT_ZERO;
664#endif
665 for (int i = 0; i < 3 * state->frame_size; i++) state->play_buf[i] = 0;
666 state->play_buf_pos = PLAYBACK_DELAY * state->frame_size;
667 state->play_buf_started = 0;
668 }
669
675 int control(int request, void* ptr) {
676 switch (request) {
677 case ECHO_GET_FRAME_SIZE:
678 (*(int*)ptr) = state->frame_size;
679 break;
680 case ECHO_SET_SAMPLING_RATE:
681 state->sampling_rate = (*(int*)ptr);
682 state->spec_average = state->frame_size / (float)state->sampling_rate;
683 state->beta0 = (2.0f * state->frame_size) / state->sampling_rate;
684 state->beta_max = (.5f * state->frame_size) / state->sampling_rate;
685 if (state->sampling_rate < 12000)
686 state->notch_radius = .9f;
687 else if (state->sampling_rate < 24000)
688 state->notch_radius = .982f;
689 else
690 state->notch_radius = .992f;
691 break;
692 case ECHO_GET_SAMPLING_RATE:
693 (*(int*)ptr) = state->sampling_rate;
694 break;
695 case ECHO_GET_IMPULSE_RESPONSE_SIZE:
696 *((echo_int32_t*)ptr) = state->M * state->frame_size;
697 break;
698 case ECHO_GET_IMPULSE_RESPONSE: {
699 int M = state->M, N = state->window_size, n = state->frame_size;
700 echo_int32_t* filt = (echo_int32_t*)ptr;
701 for (int j = 0; j < M; j++) {
702 echo_ifft<Allocator>(state->fft_table, &state->W[j * N], state->wtmp);
703 for (int i = 0; i < n; i++) filt[j * n + i] = 32767 * state->wtmp[i];
704 }
705 } break;
706 default:
707 return -1;
708 }
709 return 0;
710 }
711
713 int getFrameSize() { return state->frame_size; }
714
718 void setSamplingRate(int rate) { control(ECHO_SET_SAMPLING_RATE, &rate); }
719
722
725
729 void getImpulseResponse(echo_int32_t* response) {
730 control(ECHO_GET_IMPULSE_RESPONSE, response);
731 }
732
736 void setFilterLength(int len) {
737 if (initialized) {
738 echoWarning("Cannot change filter length after initialization");
739 return;
740 }
741 filter_length = len;
742 }
743
746
750 void setMicChannels(int num) {
751 if (initialized) {
752 echoWarning("Cannot change mic channels after initialization");
753 return;
754 }
755 nb_mic = num;
756 }
757
759 int getMicChannels() { return nb_mic; }
760
764 void setSpeakerChannels(int num) {
765 if (initialized) {
766 echoWarning("Cannot change speaker channels after initialization");
767 return;
768 }
769 nb_speakers = num;
770 }
771
774
778 void setFFTDriver(AudioFFTBase& fftDriver) {
779 if (initialized) {
780 echoWarning("Cannot change FFT driver after initialization");
781 return;
782 }
783 fft_driver = &fftDriver;
784 }
785
787 EchoState* getState() { return state; }
788
789 protected:
790 EchoState* state = nullptr;
794 int nb_mic;
796 bool initialized = false;
802 if (initialized) return;
803
804 int frameSize = fft_driver->config().length;
806 if (state && fft_driver) {
807 state->fft_table =
808 echo_fft_init<Allocator>(state->window_size, fft_driver, allocator);
809 }
810 initialized = true;
811 }
812
820 template <typename T>
821 T* echoAlloc(size_t count) {
822 typename std::allocator_traits<Allocator>::template rebind_alloc<T> alloc(allocator);
823 T* ptr = alloc.allocate(count);
824 // Initialize to zero (equivalent to calloc behavior)
825 for (size_t i = 0; i < count; ++i) {
826 std::allocator_traits<typename std::allocator_traits<Allocator>::template rebind_alloc<T>>::construct(alloc, ptr + i);
827 }
828 return ptr;
829 }
830
838 template <typename T>
839 void echoFree(T* ptr, size_t count = 0) {
840 if (ptr) {
841 typename std::allocator_traits<Allocator>::template rebind_alloc<T> alloc(allocator);
842 // Note: count is not used since we can't track allocation size
843 // This is acceptable for POD types as we're using with speexdsp
844 alloc.deallocate(ptr, count);
845 }
846 }
847
848 inline void echoWarning(const char* str) {
849 LOGW("EchoCanceller Warning: %s", str);
850 }
851
852 inline void echoFatal(const char* str) {
853 LOGE("EchoCanceller Error: %s", str);
854 }
855
856 inline float spxSqrt(float x) { return sqrtf(x); }
857
858 inline echo_int16_t spxIlog2(echo_uint32_t x) {
859 int r = 0;
860 if (x >= (echo_int32_t)65536) {
861 x >>= 16;
862 r += 16;
863 }
864 if (x >= 256) {
865 x >>= 8;
866 r += 8;
867 }
868 if (x >= 16) {
869 x >>= 4;
870 r += 4;
871 }
872 if (x >= 4) {
873 x >>= 2;
874 r += 2;
875 }
876 if (x >= 2) {
877 r += 1;
878 }
879 return r;
880 }
881
887 inline float spxExp(float x) { return expf(x); }
888
894 inline float spxCos(float x) { return cosf(x); }
895
905 inline void filterDcNotch16(const echo_int16_t* in, echo_word16_t radius,
906 echo_word16_t* out, int len, echo_mem_t* mem,
907 int stride) {
908 echo_word16_t den2 = radius * radius + .7f * (1 - radius) * (1 - radius);
909 for (int i = 0; i < len; i++) {
910 echo_word16_t vin = in[i * stride];
911 echo_word32_t vout = mem[0] + vin;
912 mem[0] = mem[1] + 2 * (-vin + radius * vout);
913 mem[1] = vin - den2 * vout;
914 out[i] = radius * vout;
915 }
916 }
917
925 inline echo_word32_t mdfInnerProd(const echo_word16_t* x, const echo_word16_t* y,
926 int len) {
927 float sum = 0;
928 for (int i = 0; i < len; i++) {
929 sum += x[i] * y[i];
930 }
931 return sum;
932 }
933
940 inline void powerSpectrum(const echo_word16_t* X, echo_word32_t* ps, int N) {
941 ps[0] = X[0] * X[0];
942 for (int i = 1, j = 1; i < N - 1; i += 2, j++) {
943 ps[j] = X[i] * X[i] + X[i + 1] * X[i + 1];
944 }
945 ps[N / 2] = X[N - 1] * X[N - 1];
946 }
947
954 inline void powerSpectrumAccum(const echo_word16_t* X, echo_word32_t* ps,
955 int N) {
956 ps[0] += X[0] * X[0];
957 for (int i = 1, j = 1; i < N - 1; i += 2, j++) {
958 ps[j] += X[i] * X[i] + X[i + 1] * X[i + 1];
959 }
960 ps[N / 2] += X[N - 1] * X[N - 1];
961 }
962
971 inline void spectralMulAccum(const echo_word16_t* X, const echo_word32_t* Y,
972 echo_word16_t* acc, int N, int M) {
973 for (int i = 0; i < N; i++) acc[i] = 0;
974
975 for (int j = 0; j < M; j++) {
976 acc[0] += X[0] * Y[0];
977 for (int i = 1; i < N - 1; i += 2) {
978 acc[i] += (X[i] * Y[i] - X[i + 1] * Y[i + 1]);
979 acc[i + 1] += (X[i + 1] * Y[i] + X[i] * Y[i + 1]);
980 }
981 acc[N - 1] += X[N - 1] * Y[N - 1];
982 X += N;
983 Y += N;
984 }
985 }
986
996 inline void weightedSpectralMulConj(const echo_float_t* w, const echo_float_t p,
997 const echo_word16_t* X,
998 const echo_word16_t* Y, echo_word32_t* prod,
999 int N) {
1000 echo_float_t W;
1001 W = p * w[0];
1002 prod[0] = W * ((int32_t)X[0] * Y[0]);
1003 for (int i = 1, j = 1; i < N - 1; i += 2, j++) {
1004 W = p * w[j];
1005 prod[i] = W * ((int32_t)(X[i] * Y[i] + X[i + 1] * Y[i + 1]));
1006 prod[i + 1] = W * ((int32_t)(-X[i + 1] * Y[i] + X[i] * Y[i + 1]));
1007 }
1008 W = p * w[N / 2];
1009 prod[N - 1] = W * ((int32_t)X[N - 1] * Y[N - 1]);
1010 }
1011
1020 inline void mdfAdjustProp(const echo_word32_t* W, int N, int M, int P,
1021 echo_word16_t* prop) {
1022 echo_word16_t max_sum = 1;
1023 echo_word32_t prop_sum = 1;
1024
1025 for (int i = 0; i < M; i++) {
1026 echo_word32_t tmp = 1;
1027 for (int p = 0; p < P; p++) {
1028 for (int j = 0; j < N; j++) {
1029 float val = W[p * N * M + i * N + j];
1030 tmp += val * val;
1031 }
1032 }
1033 prop[i] = spxSqrt(tmp);
1034 if (prop[i] > max_sum) max_sum = prop[i];
1035 }
1036
1037 for (int i = 0; i < M; i++) {
1038 prop[i] += 0.1f * max_sum;
1039 prop_sum += prop[i];
1040 }
1041
1042 for (int i = 0; i < M; i++) {
1043 prop[i] = 0.99f * prop[i] / prop_sum;
1044 }
1045 }
1046
1056 int nb_speakers) {
1057 int N = frame_size * 2;
1058 int M = (filter_length + frame_size - 1) / frame_size;
1059 int C = nb_mic;
1060 int K = nb_speakers;
1061
1062 EchoState* st = new EchoState();
1063 if (!st) return nullptr;
1064
1065 st->K = K;
1066 st->C = C;
1067 st->frame_size = frame_size;
1068 st->window_size = N;
1069 st->M = M;
1070 st->cancel_count = 0;
1071 st->sum_adapt = 0;
1072 st->saturated = 0;
1073 st->screwed_up = 0;
1074 st->sampling_rate = 8000;
1075 st->spec_average = st->frame_size / (float)st->sampling_rate;
1076 st->beta0 = (2.0f * st->frame_size) / st->sampling_rate;
1077 st->beta_max = (.5f * st->frame_size) / st->sampling_rate;
1078 st->leak_estimate = 0;
1079
1080 // Allocate buffers
1081 st->e = echoAlloc<echo_word16_t>(C * N);
1082 st->x = echoAlloc<echo_word16_t>(K * N);
1083 st->input = echoAlloc<echo_word16_t>(C * st->frame_size);
1084 st->y = echoAlloc<echo_word16_t>(C * N);
1085 st->last_y = echoAlloc<echo_word16_t>(C * N);
1086 st->Yf = echoAlloc<echo_word32_t>(st->frame_size + 1);
1087 st->Rf = echoAlloc<echo_word32_t>(st->frame_size + 1);
1088 st->Xf = echoAlloc<echo_word32_t>(st->frame_size + 1);
1089 st->Yh = echoAlloc<echo_word32_t>(st->frame_size + 1);
1090 st->Eh = echoAlloc<echo_word32_t>(st->frame_size + 1);
1091 st->X = echoAlloc<echo_word16_t>(K * (M + 1) * N);
1092 st->Y = echoAlloc<echo_word16_t>(C * N);
1093 st->E = echoAlloc<echo_word16_t>(C * N);
1094 st->W = echoAlloc<echo_word32_t>(C * K * M * N);
1095
1096#ifdef TWO_PATH
1097 st->foreground = echoAlloc<echo_word16_t>(M * N * C * K);
1098#endif
1099
1100 st->PHI = echoAlloc<echo_word32_t>(N);
1101 st->power = echoAlloc<echo_word32_t>(frame_size + 1);
1102 st->power_1 = echoAlloc<echo_float_t>(frame_size + 1);
1103 st->window = echoAlloc<echo_word16_t>(N);
1104 st->prop = echoAlloc<echo_word16_t>(M);
1105 st->wtmp = echoAlloc<echo_word16_t>(N);
1106
1107 // Initialize window
1108 for (int i = 0; i < N; i++)
1109 st->window[i] = .5f - .5f * cosf(2 * M_PI * i / N);
1110
1111 // Initialize power_1
1112 for (int i = 0; i <= st->frame_size; i++) st->power_1[i] = FLOAT_ONE;
1113
1114 // Initialize W
1115 for (int i = 0; i < N * M * K * C; i++) st->W[i] = 0;
1116
1117 // Initialize prop
1118 {
1119 echo_word32_t sum = 0;
1120 float decay = expf(-2.4f / M);
1121 st->prop[0] = .7f;
1122 sum = st->prop[0];
1123 for (int i = 1; i < M; i++) {
1124 st->prop[i] = st->prop[i - 1] * decay;
1125 sum += st->prop[i];
1126 }
1127 for (int i = M - 1; i >= 0; i--) {
1128 st->prop[i] = .8f * st->prop[i] / sum;
1129 }
1130 }
1131
1132 st->memX = echoAlloc<echo_word16_t>(K);
1133 st->memD = echoAlloc<echo_word16_t>(C);
1134 st->memE = echoAlloc<echo_word16_t>(C);
1135 st->preemph = .9f;
1136
1137 if (st->sampling_rate < 12000)
1138 st->notch_radius = .9f;
1139 else if (st->sampling_rate < 24000)
1140 st->notch_radius = .982f;
1141 else
1142 st->notch_radius = .992f;
1143
1144 st->notch_mem = echoAlloc<echo_mem_t>(2 * C);
1145 st->adapted = 0;
1146 st->Pey = st->Pyy = FLOAT_ONE;
1147
1148#ifdef TWO_PATH
1149 st->Davg1 = st->Davg2 = 0;
1150 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
1151#endif
1152
1153 st->play_buf =
1154 echoAlloc<echo_int16_t>(K * (PLAYBACK_DELAY + 1) * st->frame_size);
1155 st->play_buf_pos = PLAYBACK_DELAY * st->frame_size;
1156 st->play_buf_started = 0;
1157
1158 st->fft_table = nullptr;
1159
1160 return st;
1161 }
1162
1179 inline void echoCancellationImpl(EchoState* st, const echo_int16_t* in,
1180 const echo_int16_t* far_end,
1181 echo_int16_t* out) {
1182 int N = st->window_size;
1183 int M = st->M;
1184 int C = st->C;
1185 int K = st->K;
1186
1187 st->cancel_count++;
1188 float ss = .35f / M;
1189 float ss_1 = 1 - ss;
1190
1191 // Apply notch filter and pre-emphasis to input
1192 for (int chan = 0; chan < C; chan++) {
1193 filterDcNotch16(in + chan, st->notch_radius,
1194 st->input + chan * st->frame_size, st->frame_size,
1195 st->notch_mem + 2 * chan, C);
1196
1197 for (int i = 0; i < st->frame_size; i++) {
1198 echo_word32_t tmp32 =
1199 st->input[chan * st->frame_size + i] - st->preemph * st->memD[chan];
1200 st->memD[chan] = st->input[chan * st->frame_size + i];
1201 st->input[chan * st->frame_size + i] = tmp32;
1202 }
1203 }
1204
1205 // Process far-end signal
1206 for (int speak = 0; speak < K; speak++) {
1207 std::memmove(&st->x[speak * N],
1208 &st->x[speak * N + st->frame_size],
1209 st->frame_size * sizeof(echo_word16_t));
1210 for (int i = 0; i < st->frame_size; i++) {
1211 echo_word32_t tmp32 =
1212 far_end[i * K + speak] - st->preemph * st->memX[speak];
1213 st->x[speak * N + i + st->frame_size] = tmp32;
1214 st->memX[speak] = far_end[i * K + speak];
1215 }
1216 }
1217
1218 // Shift memory and compute FFT of far-end
1219 for (int speak = 0; speak < K; speak++) {
1220 for (int j = M - 1; j >= 0; j--) {
1221 std::memmove(&st->X[(j + 1) * N * K + speak * N],
1222 &st->X[j * N * K + speak * N],
1223 N * sizeof(echo_word16_t));
1224 }
1225 echo_fft<Allocator>(st->fft_table, st->x + speak * N, &st->X[speak * N]);
1226 }
1227
1228 // Compute power spectrum of far-end
1229 echo_word32_t Sxx = 0;
1230 for (int speak = 0; speak < K; speak++) {
1231 Sxx += mdfInnerProd(st->x + speak * N + st->frame_size,
1232 st->x + speak * N + st->frame_size, st->frame_size);
1233 powerSpectrumAccum(st->X + speak * N, st->Xf, N);
1234 }
1235
1236 // Compute foreground filter output and residual
1237 echo_word32_t Sff = 0;
1238 for (int chan = 0; chan < C; chan++) {
1239#ifdef TWO_PATH
1240 spectralMulAccum(st->X, st->foreground + chan * N * K * M,
1241 st->Y + chan * N, N, M * K);
1242 echo_ifft<Allocator>(st->fft_table, st->Y + chan * N, st->e + chan * N);
1243 for (int i = 0; i < st->frame_size; i++)
1244 st->e[chan * N + i] = st->input[chan * st->frame_size + i] -
1245 st->e[chan * N + i + st->frame_size];
1246 Sff += mdfInnerProd(st->e + chan * N, st->e + chan * N, st->frame_size);
1247#endif
1248 }
1249
1250 // Adjust proportional adaptation
1251 if (st->adapted) mdfAdjustProp(st->W, N, M, C * K, st->prop);
1252
1253 // Compute weight gradient
1254 if (st->saturated == 0) {
1255 for (int chan = 0; chan < C; chan++) {
1256 for (int speak = 0; speak < K; speak++) {
1257 for (int j = M - 1; j >= 0; j--) {
1258 weightedSpectralMulConj(st->power_1, st->prop[j],
1259 &st->X[(j + 1) * N * K + speak * N],
1260 st->E + chan * N, st->PHI, N);
1261 for (int i = 0; i < N; i++)
1262 st->W[chan * N * K * M + j * N * K + speak * N + i] += st->PHI[i];
1263 }
1264 }
1265 }
1266 } else {
1267 st->saturated--;
1268 }
1269
1270 // Update weights (AUMDF)
1271 for (int chan = 0; chan < C; chan++) {
1272 for (int speak = 0; speak < K; speak++) {
1273 for (int j = 0; j < M; j++) {
1274 if (j == 0 || st->cancel_count % (M - 1) == j - 1) {
1275 echo_ifft<Allocator>(
1276 st->fft_table, &st->W[chan * N * K * M + j * N * K + speak * N],
1277 st->wtmp);
1278 for (int i = st->frame_size; i < N; i++) st->wtmp[i] = 0;
1279 echo_fft<Allocator>(
1280 st->fft_table, st->wtmp,
1281 &st->W[chan * N * K * M + j * N * K + speak * N]);
1282 }
1283 }
1284 }
1285 }
1286
1287 // Initialize spectrum buffers
1288 for (int i = 0; i <= st->frame_size; i++)
1289 st->Rf[i] = st->Yf[i] = st->Xf[i] = 0;
1290
1291 echo_word32_t Dbf = 0;
1292 echo_word32_t See = 0;
1293
1294#ifdef TWO_PATH
1295 // Compute background filter output
1296 for (int chan = 0; chan < C; chan++) {
1297 spectralMulAccum(st->X, st->W + chan * N * K * M, st->Y + chan * N, N,
1298 M * K);
1299 echo_ifft<Allocator>(st->fft_table, st->Y + chan * N, st->y + chan * N);
1300 for (int i = 0; i < st->frame_size; i++)
1301 st->e[chan * N + i] = st->e[chan * N + i + st->frame_size] -
1302 st->y[chan * N + i + st->frame_size];
1303 Dbf +=
1304 10 + mdfInnerProd(st->e + chan * N, st->e + chan * N, st->frame_size);
1305 for (int i = 0; i < st->frame_size; i++)
1306 st->e[chan * N + i] = st->input[chan * st->frame_size + i] -
1307 st->y[chan * N + i + st->frame_size];
1308 See += mdfInnerProd(st->e + chan * N, st->e + chan * N, st->frame_size);
1309 }
1310#endif
1311
1312#ifndef TWO_PATH
1313 Sff = See;
1314#endif
1315
1316#ifdef TWO_PATH
1317 // Two-path filter logic
1318 st->Davg1 = .6f * st->Davg1 + .4f * (Sff - See);
1319 st->Davg2 = .85f * st->Davg2 + .15f * (Sff - See);
1320 st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1),
1321 FLOAT_MUL32U(.4f * Sff, .4f * Dbf));
1322 st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2),
1323 FLOAT_MUL32U(.15f * Sff, .15f * Dbf));
1324
1325 int update_foreground = 0;
1326 if (FLOAT_GT(FLOAT_MUL32U(Sff - See, fabsf(Sff - See)),
1327 FLOAT_MUL32U(Sff, Dbf)))
1328 update_foreground = 1;
1329 else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, fabsf(st->Davg1)),
1330 FLOAT_MULT(VAR1_UPDATE, st->Dvar1)))
1331 update_foreground = 1;
1332 else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, fabsf(st->Davg2)),
1333 FLOAT_MULT(VAR2_UPDATE, st->Dvar2)))
1334 update_foreground = 1;
1335
1336 if (update_foreground) {
1337 st->Davg1 = st->Davg2 = 0;
1338 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
1339 std::memcpy(st->foreground, st->W, N * M * C * K * sizeof(echo_word16_t));
1340 for (int chan = 0; chan < C; chan++)
1341 for (int i = 0; i < st->frame_size; i++)
1342 st->e[chan * N + i + st->frame_size] =
1343 st->window[i + st->frame_size] *
1344 st->e[chan * N + i + st->frame_size] +
1345 st->window[i] * st->y[chan * N + i + st->frame_size];
1346 } else {
1347 int reset_background = 0;
1348 if (FLOAT_GT(FLOAT_MUL32U(-(Sff - See), fabsf(Sff - See)),
1349 FLOAT_MULT(VAR_BACKTRACK, FLOAT_MUL32U(Sff, Dbf))))
1350 reset_background = 1;
1351 if (FLOAT_GT(FLOAT_MUL32U(-st->Davg1, fabsf(st->Davg1)),
1352 FLOAT_MULT(VAR_BACKTRACK, st->Dvar1)))
1353 reset_background = 1;
1354 if (FLOAT_GT(FLOAT_MUL32U(-st->Davg2, fabsf(st->Davg2)),
1355 FLOAT_MULT(VAR_BACKTRACK, st->Dvar2)))
1356 reset_background = 1;
1357
1358 if (reset_background) {
1359 std::memcpy(st->W, st->foreground, N * M * C * K * sizeof(echo_word32_t));
1360 for (int chan = 0; chan < C; chan++) {
1361 for (int i = 0; i < st->frame_size; i++)
1362 st->y[chan * N + i + st->frame_size] =
1363 st->e[chan * N + i + st->frame_size];
1364 for (int i = 0; i < st->frame_size; i++)
1365 st->e[chan * N + i] = st->input[chan * st->frame_size + i] -
1366 st->y[chan * N + i + st->frame_size];
1367 }
1368 See = Sff;
1369 st->Davg1 = st->Davg2 = 0;
1370 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
1371 }
1372 }
1373#endif
1374
1375 echo_word32_t Sey = 0, Syy = 0, Sdd = 0;
1376 for (int chan = 0; chan < C; chan++) {
1377 // Compute output with de-emphasis
1378 for (int i = 0; i < st->frame_size; i++) {
1379 echo_word32_t tmp_out;
1380#ifdef TWO_PATH
1381 tmp_out = st->input[chan * st->frame_size + i] -
1382 st->e[chan * N + i + st->frame_size];
1383#else
1384 tmp_out = st->input[chan * st->frame_size + i] -
1385 st->y[chan * N + i + st->frame_size];
1386#endif
1387 tmp_out = tmp_out + st->preemph * st->memE[chan];
1388 if (in[i * C + chan] <= -32000 || in[i * C + chan] >= 32000) {
1389 if (st->saturated == 0) st->saturated = 1;
1390 }
1391 out[i * C + chan] = WORD2INT(tmp_out);
1392 st->memE[chan] = tmp_out;
1393 }
1394
1395 // Prepare error signal for filter update
1396 for (int i = 0; i < st->frame_size; i++) {
1397 st->e[chan * N + i + st->frame_size] = st->e[chan * N + i];
1398 st->e[chan * N + i] = 0;
1399 }
1400
1401 // Compute correlations
1402 Sey += mdfInnerProd(st->e + chan * N + st->frame_size,
1403 st->y + chan * N + st->frame_size, st->frame_size);
1404 Syy += mdfInnerProd(st->y + chan * N + st->frame_size,
1405 st->y + chan * N + st->frame_size, st->frame_size);
1406 Sdd += mdfInnerProd(st->input + chan * st->frame_size,
1407 st->input + chan * st->frame_size, st->frame_size);
1408
1409 // Convert error to frequency domain
1410 echo_fft<Allocator>(st->fft_table, st->e + chan * N, st->E + chan * N);
1411
1412 for (int i = 0; i < st->frame_size; i++) st->y[i + chan * N] = 0;
1413 echo_fft<Allocator>(st->fft_table, st->y + chan * N, st->Y + chan * N);
1414
1415 // Compute power spectra
1416 powerSpectrumAccum(st->E + chan * N, st->Rf, N);
1417 powerSpectrumAccum(st->Y + chan * N, st->Yf, N);
1418 }
1419
1420 // Sanity checks
1421 if (!(Syy >= 0 && Sxx >= 0 && See >= 0) ||
1422 !(Sff < N * 1e9f && Syy < N * 1e9f && Sxx < N * 1e9f)) {
1423 st->screwed_up += 50;
1424 for (int i = 0; i < st->frame_size * C; i++) out[i] = 0;
1425 } else if (Sff > Sdd + N * 10000.0f) {
1426 st->screwed_up++;
1427 } else {
1428 st->screwed_up = 0;
1429 }
1430
1431 if (st->screwed_up >= 50) {
1432 echoWarning("Echo canceller reset");
1433 reset();
1434 return;
1435 }
1436
1437 See = MAX32(See, N * 100.0f);
1438
1439 for (int speak = 0; speak < K; speak++) {
1440 Sxx += mdfInnerProd(st->x + speak * N + st->frame_size,
1441 st->x + speak * N + st->frame_size, st->frame_size);
1442 powerSpectrumAccum(st->X + speak * N, st->Xf, N);
1443 }
1444
1445 // Smooth far-end energy
1446 for (int j = 0; j <= st->frame_size; j++)
1447 st->power[j] = ss_1 * st->power[j] + 1 + ss * st->Xf[j];
1448
1449 // Compute filtered spectra and correlations
1450 echo_float_t Pey = FLOAT_ZERO, Pyy = FLOAT_ZERO;
1451 for (int j = st->frame_size; j >= 0; j--) {
1452 echo_float_t Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]);
1453 echo_float_t Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]);
1454 Pey = FLOAT_ADD(Pey, FLOAT_MULT(Eh, Yh));
1455 Pyy = FLOAT_ADD(Pyy, FLOAT_MULT(Yh, Yh));
1456 st->Eh[j] =
1457 (1 - st->spec_average) * st->Eh[j] + st->spec_average * st->Rf[j];
1458 st->Yh[j] =
1459 (1 - st->spec_average) * st->Yh[j] + st->spec_average * st->Yf[j];
1460 }
1461
1462 Pyy = FLOAT_SQRT(Pyy);
1463 Pey = FLOAT_DIVU(Pey, Pyy);
1464
1465 // Compute correlation update rate
1466 echo_word32_t tmp32 = st->beta0 * Syy;
1467 if (tmp32 > st->beta_max * See) tmp32 = st->beta_max * See;
1468 echo_float_t alpha = tmp32 / See;
1469 echo_float_t alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
1470
1471 st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1, st->Pey), FLOAT_MULT(alpha, Pey));
1472 st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1, st->Pyy), FLOAT_MULT(alpha, Pyy));
1473 if (FLOAT_LT(st->Pyy, FLOAT_ONE)) st->Pyy = FLOAT_ONE;
1474 if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK, st->Pyy)))
1475 st->Pey = FLOAT_MULT(MIN_LEAK, st->Pyy);
1476 if (FLOAT_GT(st->Pey, st->Pyy)) st->Pey = st->Pyy;
1477
1478 st->leak_estimate =
1479 FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy), 14));
1480 if (st->leak_estimate > 16383)
1481 st->leak_estimate = 32767;
1482 else
1483 st->leak_estimate = st->leak_estimate * 2;
1484
1485 // Compute RER
1486 echo_word16_t RER;
1487 RER = (.0001f * Sxx + 3.f * st->leak_estimate * Syy) / See;
1488 if (RER < Sey * Sey / (1 + See * Syy)) RER = Sey * Sey / (1 + See * Syy);
1489 if (RER > .5f) RER = .5f;
1490
1491 if (!st->adapted && st->sum_adapt > M &&
1492 st->leak_estimate * Syy > .03f * Syy) {
1493 st->adapted = 1;
1494 }
1495
1496 if (st->adapted) {
1497 for (int i = 0; i <= st->frame_size; i++) {
1498 echo_word32_t r = st->leak_estimate * st->Yf[i];
1499 echo_word32_t e = st->Rf[i] + 1;
1500 if (r > .5f * e) r = .5f * e;
1501 r = .7f * r + .3f * (RER * e);
1502 st->power_1[i] = r / (e * (st->power[i] + 10));
1503 }
1504 } else {
1505 echo_word16_t adapt_rate = 0;
1506 if (Sxx > N * 1000.0f) {
1507 tmp32 = .25f * Sxx;
1508 if (tmp32 > .25f * See) tmp32 = .25f * See;
1509 adapt_rate = tmp32 / See;
1510 }
1511 for (int i = 0; i <= st->frame_size; i++)
1512 st->power_1[i] = adapt_rate / (st->power[i] + 10);
1513 st->sum_adapt = st->sum_adapt + adapt_rate;
1514 }
1515
1516 std::memmove(st->last_y,
1517 &st->last_y[st->frame_size],
1518 st->frame_size * sizeof(echo_word16_t));
1519 if (st->adapted) {
1520 for (int i = 0; i < st->frame_size; i++)
1521 st->last_y[st->frame_size + i] = in[i] - out[i];
1522 }
1523 }
1524};
1525
1526// ============================================================================
1527// FFT Implementation
1528// ============================================================================
1537template <typename Allocator>
1538inline void* echo_fft_init(int size, AudioFFTBase* driver, const Allocator& alloc) {
1539 if (!driver) {
1540 return nullptr;
1541 }
1542
1543 // Configure FFT with the required size
1544 AudioFFTConfig cfg;
1545 cfg.length = size;
1546 cfg.rxtx_mode = TX_MODE; // We need both FFT and IFFT capabilities
1547
1548 if (!driver->begin(cfg)) {
1549 return nullptr;
1550 }
1551 return new fft_state<Allocator>(size, driver, alloc);
1552}
1553
1559template <typename Allocator = std::allocator<uint8_t>>
1560inline void echo_fft_destroy(void* table) {
1561 if (table) {
1562 auto* st = static_cast<fft_state<Allocator>*>(table);
1563 st->driver->end();
1564 delete st;
1565 }
1566}
1567
1576template <typename Allocator = std::allocator<uint8_t>>
1577inline void echo_fft(void* table, echo_word16_t* in,
1578 echo_word16_t* out) {
1579 auto* st = static_cast<fft_state<Allocator>*>(table);
1580 if (!st || !st->driver) return;
1581
1582 // Set input values
1583 for (int i = 0; i < st->N; i++) {
1584 st->driver->setValue(i, in[i] / (float)st->N);
1585 }
1586
1587 // Perform FFT
1588 st->driver->fft();
1589
1590 // Get output in packed format: out[0]=real[0], out[1]=real[1],
1591 // out[2]=img[1],
1592 // ...
1593 out[0] = st->driver->getValue(0); // DC component
1594 for (int i = 1; i < st->N - 1; i += 2) {
1595 int bin = (i + 1) / 2;
1596 float real, img;
1597 st->driver->getBin(bin, real, img);
1598 out[i] = real;
1599 out[i + 1] = img;
1600 }
1601 out[st->N - 1] = st->driver->getValue(st->N / 2); // Nyquist
1602}
1603
1612template <typename Allocator = std::allocator<uint8_t>>
1613inline void echo_ifft(void* table, echo_word16_t* in,
1614 echo_word16_t* out) {
1615 auto* st = static_cast<fft_state<Allocator>*>(table);
1616 if (!st || !st->driver || !st->driver->isReverseFFT()) return;
1617
1618 // Set bins from packed format
1619 st->driver->setBin(0, in[0], 0);
1620 for (int i = 1; i < st->N - 1; i += 2) {
1621 int bin = (i + 1) / 2;
1622 st->driver->setBin(bin, in[i], in[i + 1]);
1623 }
1624 st->driver->setBin(st->N / 2, in[st->N - 1], 0);
1625
1626 // Perform inverse FFT
1627 st->driver->rfft();
1628
1629 // Get output
1630 for (int i = 0; i < st->N; i++) {
1631 out[i] = st->driver->getValue(i);
1632 }
1633}
1634
1635} // namespace audio_tools
Memory allocateator which uses malloc.
Definition Allocator.h:23
Executes FFT using audio data privded by write() and/or an inverse FFT where the samples are made ava...
Definition AudioFFT.h:191
void end() override
Release the allocated memory.
Definition AudioFFT.h:285
AudioFFTConfig & config()
Provides the actual configuration.
Definition AudioFFT.h:639
FFTDriver * driver()
Definition AudioFFT.h:551
bool setBin(int idx, float real, float img)
sets the value of a bin
Definition AudioFFT.h:618
bool begin(AudioFFTConfig info)
starts the processing
Definition AudioFFT.h:207
Base class for all Audio Streams. It support the boolean operator to test if the object is ready with...
Definition BaseStream.h:122
virtual void fft()=0
Perform FFT.
Acoustic echo canceller using MDF algorithm.
Definition MDFEchoCancellation.h:508
void spectralMulAccum(const echo_word16_t *X, const echo_word32_t *Y, echo_word16_t *acc, int N, int M)
Accumulate spectral multiplication across multiple frames.
Definition MDFEchoCancellation.h:971
int getMicChannels()
Definition MDFEchoCancellation.h:759
void echo_ifft(void *table, echo_word16_t *in, echo_word16_t *out)
Perform inverse FFT.
Definition MDFEchoCancellation.h:1613
void cancel(const echo_int16_t *rec, const echo_int16_t *play, echo_int16_t *out)
Definition MDFEchoCancellation.h:575
int getSamplingRate()
Definition MDFEchoCancellation.h:721
void mdfAdjustProp(const echo_word32_t *W, int N, int M, int P, echo_word16_t *prop)
Adjust proportional adaptation weights.
Definition MDFEchoCancellation.h:1020
void echo_fft(void *table, echo_word16_t *in, echo_word16_t *out)
Perform forward FFT.
Definition MDFEchoCancellation.h:1577
std::vector< float, FloatAllocator > temp_real
Definition MDFEchoCancellation.h:456
~MDFEchoCancellation()
Definition MDFEchoCancellation.h:532
void getImpulseResponse(echo_int32_t *response)
Definition MDFEchoCancellation.h:729
int K
Definition MDFEchoCancellation.h:388
echo_word32_t mdfInnerProd(const echo_word16_t *x, const echo_word16_t *y, int len)
Compute inner product of two vectors.
Definition MDFEchoCancellation.h:925
void setSpeakerChannels(int num)
Definition MDFEchoCancellation.h:764
AudioFFTBase * fft_driver
Definition MDFEchoCancellation.h:791
void setMicChannels(int num)
Definition MDFEchoCancellation.h:750
AudioFFTBase * driver
Definition MDFEchoCancellation.h:454
void * echo_fft_init(int size, AudioFFTBase *driver, const Allocator &alloc)
Initialize FFT state.
Definition MDFEchoCancellation.h:1538
void filterDcNotch16(const echo_int16_t *in, echo_word16_t radius, echo_word16_t *out, int len, echo_mem_t *mem, int stride)
Apply DC notch filter to remove DC offset.
Definition MDFEchoCancellation.h:905
fft_state(int size, AudioFFTBase *drv, const Allocator &alloc=Allocator())
Construct FFT state with specified size and driver.
Definition MDFEchoCancellation.h:465
void echoFree(T *ptr, size_t count=0)
Deallocate memory using custom allocator.
Definition MDFEchoCancellation.h:839
echo_int32_t sampling_rate
Definition MDFEchoCancellation.h:389
int control(int request, void *ptr)
Definition MDFEchoCancellation.h:675
MDFEchoCancellation(int filterLength, AudioFFTBase &fftDriver, const Allocator &alloc=Allocator())
Definition MDFEchoCancellation.h:515
int N
Definition MDFEchoCancellation.h:455
int nb_speakers
Definition MDFEchoCancellation.h:795
int getFilterLength()
Definition MDFEchoCancellation.h:745
void ensureInitialized()
Ensure echo canceller is initialized (lazy initialization)
Definition MDFEchoCancellation.h:801
void echo_fft_destroy(void *table)
Destroy FFT state and release resources.
Definition MDFEchoCancellation.h:1560
void setFilterLength(int len)
Definition MDFEchoCancellation.h:736
void playback(const echo_int16_t *play)
Definition MDFEchoCancellation.h:607
int getImpulseResponseSize()
Definition MDFEchoCancellation.h:724
int getFrameSize()
Definition MDFEchoCancellation.h:713
void powerSpectrum(const echo_word16_t *X, echo_word32_t *ps, int N)
Compute power spectrum from FFT output.
Definition MDFEchoCancellation.h:940
void setSamplingRate(int rate)
Definition MDFEchoCancellation.h:718
EchoState * getState()
Definition MDFEchoCancellation.h:787
EchoState * state
Definition MDFEchoCancellation.h:790
T * echoAlloc(size_t count)
Allocate memory for array of type T using custom allocator.
Definition MDFEchoCancellation.h:821
void powerSpectrumAccum(const echo_word16_t *X, echo_word32_t *ps, int N)
Accumulate power spectrum from FFT output.
Definition MDFEchoCancellation.h:954
std::vector< float, FloatAllocator > temp_img
Definition MDFEchoCancellation.h:457
void capture(const echo_int16_t *rec, echo_int16_t *out)
Definition MDFEchoCancellation.h:585
int nb_mic
Definition MDFEchoCancellation.h:794
typename std::allocator_traits< Allocator >::template rebind_alloc< float > FloatAllocator
Definition MDFEchoCancellation.h:452
void setFFTDriver(AudioFFTBase &fftDriver)
Definition MDFEchoCancellation.h:778
void reset()
Definition MDFEchoCancellation.h:629
int frame_size
Definition MDFEchoCancellation.h:380
MDFEchoCancellation(int filterLength, int nbMic, int nbSpeakers, AudioFFTBase &fftDriver, const Allocator &alloc=Allocator())
Definition MDFEchoCancellation.h:526
float spxExp(float x)
Compute exponential function.
Definition MDFEchoCancellation.h:887
void weightedSpectralMulConj(const echo_float_t *w, const echo_float_t p, const echo_word16_t *X, const echo_word16_t *Y, echo_word32_t *prod, int N)
Compute weighted spectral multiplication with conjugate.
Definition MDFEchoCancellation.h:996
bool initialized
Definition MDFEchoCancellation.h:796
int getSpeakerChannels()
Definition MDFEchoCancellation.h:773
float spxCos(float x)
Compute cosine function.
Definition MDFEchoCancellation.h:894
EchoState * echoStateInitMc(int frame_size, int filter_length, int nb_mic, int nb_speakers)
Initialize multi-channel echo canceller state.
Definition MDFEchoCancellation.h:1055
void echoCancellationImpl(EchoState *st, const echo_int16_t *in, const echo_int16_t *far_end, echo_int16_t *out)
Core echo cancellation implementation.
Definition MDFEchoCancellation.h:1179
Allocator allocator
Definition MDFEchoCancellation.h:792
int filter_length
Definition MDFEchoCancellation.h:793
Generic Implementation of sound input and output for desktop environments using portaudio.
Definition AudioCodecsBase.h:10
static const echo_float_t VAR1_UPDATE
Definition MDFEchoCancellation.h:326
static const echo_float_t VAR2_SMOOTH
Definition MDFEchoCancellation.h:323
static const echo_float_t VAR2_UPDATE
Definition MDFEchoCancellation.h:329
static const echo_float_t MIN_LEAK
Definition MDFEchoCancellation.h:317
static const echo_float_t VAR1_SMOOTH
Definition MDFEchoCancellation.h:320
static const echo_float_t VAR_BACKTRACK
Definition MDFEchoCancellation.h:332
Configuration for AudioFFT. If there are more then 1 channel the channel_used is defining which chann...
Definition AudioFFT.h:40
RxTxMode rxtx_mode
TX_MODE = FFT, RX_MODE = IFFT.
Definition AudioFFT.h:59
Internal echo canceller state structure.
Definition MDFEchoCancellation.h:379
Definition MDFEchoCancellation.h:173
FFT state management structure with custom allocator support.
Definition MDFEchoCancellation.h:451