arduino-audio-tools
Loading...
Searching...
No Matches
WakeWordDetector.h
Go to the documentation of this file.
1#pragma once
2
3#include <algorithm>
4#include <cmath>
5
10
11namespace audio_tools {
12
13/*
14 * @brief Frame holding the indices of the top 3 frequencies in an FFT window.
15 *
16 * Used as a compact representation of the dominant frequency content in a frame
17 * of audio.
18 */
19template <size_t N>
23
50template <typename T = int16_t, size_t N = 3>
52 public:
53 struct Template {
58 const char* name;
60 0.0f;
61 };
62
63 using WakeWordCallback = void (*)(const char* name);
64
66 : p_fft(&fft) {
67 _frame_pos = 0;
68 auto& fft_cfg = fft.config();
69 fft_cfg.ref = this;
70 fft_cfg.callback = fftResult;
71 }
72
74 _recent_frames.clear();
75 _is_recording = true;
76 }
77
82
83 bool isRecording() const { return _is_recording; }
84
86 float threshold_percent, const char* name) {
87 Template t;
88 t.frames = frames;
89 t.threshold_percent = threshold_percent;
90 t.name = name;
91 t.last_match_percent = 0.0f;
92 _templates.push_back(t);
93 if (frames.size() > _max_template_len) _max_template_len = frames.size();
94 }
95
97
98 size_t write(const uint8_t* buf, size_t size) override {
99 return p_fft->write(buf, size);
100 }
101
102 static void fftResult(AudioFFTBase& fft) {
103 // This static method must access instance data via fft.config().ref
104 auto* self = static_cast<WakeWordDetector<T,N>*>(fft.config().ref);
105 if (!self) return;
106 FrequencyFrame<N> frame;
107 AudioFFTResult result[N];
108 fft.resultArray(result);
109 for (size_t j = 0; j < N; j++) {
110 frame.top_freqs[j] = result[j].frequency;
111 }
112 self->_recent_frames.push_back(frame);
113
114 if (self->_is_recording) {
115 return;
116 }
117
118 if (self->_recent_frames.size() > self->_max_template_len)
119 self->_recent_frames.erase(self->_recent_frames.begin());
120 for (size_t i = 0; i < self->_templates.size(); ++i) {
121 Template& tmpl = self->_templates[i];
122 if (self->_recent_frames.size() >= tmpl.frames.size()) {
123 float percent = self->matchTemplate(tmpl);
124 if (percent >= tmpl.threshold_percent) {
125 if (self->_callback) self->_callback(tmpl.name);
126 }
127 }
128 }
129 }
130
131 protected:
135 AudioFFTBase* p_fft = nullptr;
136 bool _is_recording = false;
137 size_t _frame_pos;
138 size_t _max_template_len = 0;
140
142 size_t matches = 0;
143 size_t offset = _recent_frames.size() - tmpl.frames.size();
144 for (size_t i = 0; i < tmpl.frames.size(); ++i) {
145 size_t frame_matches = 0;
146 for (size_t j = 0; j < N; ++j) {
147 if (tmpl.frames[i].top_freqs[j] ==
148 _recent_frames[offset + i].top_freqs[j])
150 }
151 if (frame_matches >= (N >= 2 ? N - 1 : 1)) // at least N-1 out of N match
152 matches++;
153 }
154 float percent = (tmpl.frames.size() > 0)
155 ? (100.0f * matches / tmpl.frames.size())
156 : 0.0f;
157 tmpl.last_match_percent = percent;
158 return percent;
159 }
160};
161
162} // namespace audio_tools
Executes FFT using audio data privded by write() and/or an inverse FFT where the samples are made ava...
Definition AudioFFT.h:191
AudioFFTConfig & config()
Provides the actual configuration.
Definition AudioFFT.h:639
size_t write(const uint8_t *data, size_t len) override
Provide the audio data as FFT input.
Definition AudioFFT.h:294
void resultArray(AudioFFTResult(&result)[N])
Determines the N biggest result values.
Definition AudioFFT.h:379
Abstract Audio Ouptut class.
Definition AudioOutput.h:25
Vector implementation which provides the most important methods as defined by std::vector....
Definition Vector.h:21
Template-based wake word detector for microcontrollers using dominant frequency patterns.
Definition WakeWordDetector.h:51
static void fftResult(AudioFFTBase &fft)
Definition WakeWordDetector.h:102
void(*)(const char *name) WakeWordCallback
Definition WakeWordDetector.h:63
Vector< FrequencyFrame< N > > stopRecording()
Definition WakeWordDetector.h:78
bool isRecording() const
Definition WakeWordDetector.h:83
void addTemplate(const Vector< FrequencyFrame< N > > &frames, float threshold_percent, const char *name)
Definition WakeWordDetector.h:85
bool _is_recording
True if currently recording a template.
Definition WakeWordDetector.h:136
size_t _max_template_len
Length of the longest template.
Definition WakeWordDetector.h:138
size_t _frame_pos
Current position in frame buffer.
Definition WakeWordDetector.h:137
AudioFFTBase * p_fft
Definition WakeWordDetector.h:135
Vector< Template > _templates
List of wake word templates.
Definition WakeWordDetector.h:132
float matchTemplate(Template &tmpl)
Definition WakeWordDetector.h:141
void setWakeWordCallback(WakeWordCallback cb)
Definition WakeWordDetector.h:96
void startRecording()
Definition WakeWordDetector.h:73
Vector< FrequencyFrame< N > > _recent_frames
Recent frames for comparison.
Definition WakeWordDetector.h:133
WakeWordCallback _callback
Definition WakeWordDetector.h:139
WakeWordDetector(AudioFFTBase &fft)
Definition WakeWordDetector.h:65
Vector< T > _buffer
Buffer for incoming PCM samples.
Definition WakeWordDetector.h:134
size_t write(const uint8_t *buf, size_t size) override
Definition WakeWordDetector.h:98
Generic Implementation of sound input and output for desktop environments using portaudio.
Definition AudioCodecsBase.h:10
size_t writeData(Print *p_out, T *data, int samples, int maxSamples=512)
Definition AudioTypes.h:512
void * ref
caller
Definition AudioFFT.h:61
Result of the FFT.
Definition AudioFFT.h:23
float frequency
Definition AudioFFT.h:26
Definition WakeWordDetector.h:20
uint16_t top_freqs[N]
Indices of top 3 frequencies in FFT.
Definition WakeWordDetector.h:21
Definition WakeWordDetector.h:53
const char * name
Name/label of the wake word.
Definition WakeWordDetector.h:58
Vector< FrequencyFrame< N > > frames
Sequence of frequency frames for the wake word.
Definition WakeWordDetector.h:55
float last_match_percent
Last computed match percent for this template.
Definition WakeWordDetector.h:59
float threshold_percent
Definition WakeWordDetector.h:56