arduino-audio-tools
Loading...
Searching...
No Matches
WakeWordDetector.h
1#pragma once
2
3#include <algorithm>
4#include <cmath>
5
6#include "AudioTools/CoreAudio/AudioOutput.h"
7#include "AudioTools/CoreAudio/AudioBasic/Collections/Vector.h"
8#include "AudioTools/CoreAudio/Buffers.h"
9#include "AudioTools/AudioLibs/AudioFFT.h"
10
11namespace audio_tools {
12
13/*
14 * @brief Frame holding the indices of the top 3 frequencies in an FFT window.
15 *
16 * Used as a compact representation of the dominant frequency content in a frame
17 * of audio.
18 */
19template <size_t N>
21 uint16_t top_freqs[N];
22};
23
50template <typename T = int16_t, size_t N = 3>
52 public:
53 struct Template {
58 const char* name;
60 0.0f;
61 };
62
63 using WakeWordCallback = void (*)(const char* name);
64
66 : p_fft(&fft) {
67 _frame_pos = 0;
68 auto& fft_cfg = fft.config();
69 fft_cfg.ref = this;
70 fft_cfg.callback = fftResult;
71 }
72
73 void startRecording() {
74 _recent_frames.clear();
75 _is_recording = true;
76 }
77
78 Vector<FrequencyFrame<N>> stopRecording() {
79 _is_recording = false;
80 return _recent_frames;
81 }
82
83 bool isRecording() const { return _is_recording; }
84
85 void addTemplate(const Vector<FrequencyFrame<N>>& frames,
86 float threshold_percent, const char* name) {
87 Template t;
88 t.frames = frames;
89 t.threshold_percent = threshold_percent;
90 t.name = name;
91 t.last_match_percent = 0.0f;
92 _templates.push_back(t);
93 if (frames.size() > _max_template_len) _max_template_len = frames.size();
94 }
95
96 void setWakeWordCallback(WakeWordCallback cb) { _callback = cb; }
97
98 size_t write(const uint8_t* buf, size_t size) override {
99 return p_fft->write(buf, size);
100 }
101
102 static void fftResult(AudioFFTBase& fft) {
103 // This static method must access instance data via fft.config().ref
104 auto* self = static_cast<WakeWordDetector<T,N>*>(fft.config().ref);
105 if (!self) return;
106 FrequencyFrame<N> frame;
107 AudioFFTResult result[N];
108 fft.resultArray(result);
109 for (size_t j = 0; j < N; j++) {
110 frame.top_freqs[j] = result[j].frequency;
111 }
112 self->_recent_frames.push_back(frame);
113
114 if (self->_is_recording) {
115 return;
116 }
117
118 if (self->_recent_frames.size() > self->_max_template_len)
119 self->_recent_frames.erase(self->_recent_frames.begin());
120 for (size_t i = 0; i < self->_templates.size(); ++i) {
121 Template& tmpl = self->_templates[i];
122 if (self->_recent_frames.size() >= tmpl.frames.size()) {
123 float percent = self->matchTemplate(tmpl);
124 if (percent >= tmpl.threshold_percent) {
125 if (self->_callback) self->_callback(tmpl.name);
126 }
127 }
128 }
129 }
130
131 protected:
135 AudioFFTBase* p_fft = nullptr;
136 bool _is_recording = false;
137 size_t _frame_pos;
138 size_t _max_template_len = 0;
139 WakeWordCallback _callback = nullptr;
140
141 float matchTemplate(Template& tmpl) {
142 size_t matches = 0;
143 size_t offset = _recent_frames.size() - tmpl.frames.size();
144 for (size_t i = 0; i < tmpl.frames.size(); ++i) {
145 size_t frame_matches = 0;
146 for (size_t j = 0; j < N; ++j) {
147 if (tmpl.frames[i].top_freqs[j] ==
148 _recent_frames[offset + i].top_freqs[j])
149 frame_matches++;
150 }
151 if (frame_matches >= (N >= 2 ? N - 1 : 1)) // at least N-1 out of N match
152 matches++;
153 }
154 float percent = (tmpl.frames.size() > 0)
155 ? (100.0f * matches / tmpl.frames.size())
156 : 0.0f;
157 tmpl.last_match_percent = percent;
158 return percent;
159 }
160};
161
162} // namespace audio_tools
Executes FFT using audio data privded by write() and/or an inverse FFT where the samples are made ava...
Definition AudioFFT.h:191
AudioFFTConfig & config()
Provides the actual configuration.
Definition AudioFFT.h:639
size_t write(const uint8_t *data, size_t len) override
Provide the audio data as FFT input.
Definition AudioFFT.h:294
Abstract Audio Ouptut class.
Definition AudioOutput.h:25
Vector implementation which provides the most important methods as defined by std::vector....
Definition Vector.h:21
Template-based wake word detector for microcontrollers using dominant frequency patterns.
Definition WakeWordDetector.h:51
bool _is_recording
True if currently recording a template.
Definition WakeWordDetector.h:136
size_t _max_template_len
Length of the longest template.
Definition WakeWordDetector.h:138
size_t _frame_pos
Current position in frame buffer.
Definition WakeWordDetector.h:137
Vector< Template > _templates
List of wake word templates.
Definition WakeWordDetector.h:132
Vector< FrequencyFrame< N > > _recent_frames
Recent frames for comparison.
Definition WakeWordDetector.h:133
Vector< T > _buffer
Buffer for incoming PCM samples.
Definition WakeWordDetector.h:134
Generic Implementation of sound input and output for desktop environments using portaudio.
Definition AudioCodecsBase.h:10
void * ref
caller
Definition AudioFFT.h:61
Definition WakeWordDetector.h:20
uint16_t top_freqs[N]
Indices of top 3 frequencies in FFT.
Definition WakeWordDetector.h:21
Definition WakeWordDetector.h:53
const char * name
Name/label of the wake word.
Definition WakeWordDetector.h:58
Vector< FrequencyFrame< N > > frames
Sequence of frequency frames for the wake word.
Definition WakeWordDetector.h:55
float last_match_percent
Last computed match percent for this template.
Definition WakeWordDetector.h:59
float threshold_percent
Definition WakeWordDetector.h:56