arduino-audio-tools
TfLiteAudioStream.h
1 #pragma once
2 
3 // Configure FFT to output 16 bit fixed point.
4 #define FIXED_POINT 16
5 
6 //#include <MicroTFLite.h>
7 #include <TensorFlowLite.h>
8 #include <cmath>
9 #include <cstdint>
10 #include "AudioTools/CoreAudio/AudioOutput.h"
11 #include "AudioTools/CoreAudio/Buffers.h"
12 #include "tensorflow/lite/c/common.h"
13 #include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
14 #include "tensorflow/lite/experimental/microfrontend/lib/frontend_util.h"
15 #include "tensorflow/lite/micro/all_ops_resolver.h"
16 #include "tensorflow/lite/micro/kernels/micro_ops.h"
17 #include "tensorflow/lite/micro/micro_interpreter.h"
18 #include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
19 #include "tensorflow/lite/micro/system_setup.h"
20 #include "tensorflow/lite/schema/schema_generated.h"
21 
29 namespace audio_tools {
30 
31 // Forward Declarations
32 class TfLiteAudioStreamBase;
33 class TfLiteAbstractRecognizeCommands;
34 
41 class TfLiteReader {
42  public:
43  virtual bool begin(TfLiteAudioStreamBase *parent) = 0;
44  virtual int read(int16_t*data, int len) = 0;
45 };
46 
53 class TfLiteWriter {
54  public:
55  virtual bool begin(TfLiteAudioStreamBase *parent) = 0;
56  virtual bool write(const int16_t sample) = 0;
57 };
58 
66 struct TfLiteConfig {
68  const unsigned char* model = nullptr;
69  TfLiteReader *reader = nullptr;
70  TfLiteWriter *writer = nullptr;
71  TfLiteAbstractRecognizeCommands *recognizeCommands=nullptr;
72  bool useAllOpsResolver = false;
73  // callback for command handler
74  void (*respondToCommand)(const char* found_command, uint8_t score,
75  bool is_new_command) = nullptr;
76 
77  // Create an area of memory to use for input, output, and intermediate arrays.
78  // The size of this will depend on the model you’re using, and may need to be
79  // determined by experimentation.
80  size_t kTensorArenaSize = 10 * 1024;
81 
82  // Keeping these as constant expressions allow us to allocate fixed-sized
83  // arrays on the stack for our working memory.
84 
85  // The size of the input time series data we pass to the FFT to produce
86  // the frequency information. This has to be a power of two, and since
87  // we're dealing with 30ms of 16KHz inputs, which means 480 samples, this
88  // is the next value.
89  // int kMaxAudioSampleSize = 320; //512; // 480
90  int sample_rate = 16000;
91 
92  // Number of audio channels - is usually 1. If 2 we reduce it to 1 by
93  // averaging the 2 channels
94  int channels = 1;
95 
96  // The following values are derived from values used during model training.
97  // If you change the way you preprocess the input, update all these constants.
98  int kFeatureSliceSize = 40;
99  int kFeatureSliceCount = 49;
100  int kFeatureSliceStrideMs = 20;
101  int kFeatureSliceDurationMs = 30;
102 
103  // number of new slices to collect before evaluating the model
104  int kSlicesToProcess = 2;
105 
106  // Parameters for RecognizeCommands
107  int32_t average_window_duration_ms = 1000;
108  uint8_t detection_threshold = 50;
109  int32_t suppression_ms = 1500;
110  int32_t minimum_count = 3;
111 
112  // input for FrontendConfig
113  float filterbank_lower_band_limit = 125.0;
114  float filterbank_upper_band_limit = 7500.0;
115  float noise_reduction_smoothing_bits = 10;
116  float noise_reduction_even_smoothing = 0.025;
117  float noise_reduction_odd_smoothing = 0.06;
118  float noise_reduction_min_signal_remaining = 0.05;
119  bool pcan_gain_control_enable_pcan = 1;
120  float pcan_gain_control_strength = 0.95;
121  float pcan_gain_control_offset = 80.0;
122  float pcan_gain_control_gain_bits = 21;
123  bool log_scale_enable_log = 1;
124  uint8_t log_scale_scale_shift = 6;
125 
127  template<int N>
128  void setCategories(const char* (&array)[N]){
129  labels = array;
130  kCategoryCount = N;
131  }
132 
133  int categoryCount() {
134  return kCategoryCount;
135  }
136 
137  int featureElementCount() {
138  return kFeatureSliceSize * kFeatureSliceCount;
139  }
140 
141  int audioSampleSize() {
142  return kFeatureSliceDurationMs * (sample_rate / 1000);
143  }
144 
145  int strideSampleSize() {
146  return kFeatureSliceStrideMs * (sample_rate / 1000);
147  }
148 
149  private:
150  int kCategoryCount = 0;
151  const char** labels = nullptr;
152 };
153 
161  public:
162  // convert float to int8
163  static int8_t quantize(float value, float scale, float zero_point){
164  if(scale==0.0&&zero_point==0) return value;
165  return value / scale + zero_point;
166  }
167  // convert int8 to float
168  static float dequantize(int8_t value, float scale, float zero_point){
169  if(scale==0.0&&zero_point==0) return value;
170  return (value - zero_point) * scale;
171  }
172 
173  static float dequantizeToNewRange(int8_t value, float scale, float zero_point, float new_range){
174  float deq = (static_cast<float>(value) - zero_point) * scale;
175  return clip(deq * new_range, new_range);
176  }
177 
178  static float clip(float value, float range){
179  if (value>=0.0){
180  return value > range ? range : value;
181  } else {
182  return -value < -range ? -range : value;
183  }
184  }
185 };
186 
194  public:
195  virtual bool begin(TfLiteConfig cfg) = 0;
196  virtual TfLiteStatus getCommand(const TfLiteTensor* latest_results, const int32_t current_time_ms,
197  const char** found_command,uint8_t* score,bool* is_new_command) = 0;
198 
199 };
200 
215  public:
216 
218  }
219 
221  bool begin(TfLiteConfig cfg) override {
222  TRACED();
223  this->cfg = cfg;
224  if (cfg.labels == nullptr) {
225  LOGE("config.labels not defined");
226  return false;
227  }
228  return true;
229  }
230 
231  // Call this with the results of running a model on sample data.
232  virtual TfLiteStatus getCommand(const TfLiteTensor* latest_results,
233  const int32_t current_time_ms,
234  const char** found_command,
235  uint8_t* score,
236  bool* is_new_command) override {
237 
238  TRACED();
239  this->current_time_ms = current_time_ms;
240  this->time_since_last_top = current_time_ms - previous_time_ms;
241 
242  deleteOldRecords(current_time_ms - cfg.average_window_duration_ms);
243  int idx = resultCategoryIdx(latest_results->data.int8);
244  Result row(current_time_ms, idx, latest_results->data.int8[idx]);
245  result_queue.push_back(row);
246 
247  TfLiteStatus result = validate(latest_results);
248  if (result!=kTfLiteOk){
249  return result;
250  }
251  return evaluate(found_command, score, is_new_command);
252  }
253 
254  protected:
255  struct Result {
256  int32_t time_ms;
257  int category=0;
258  int8_t score=0;
259 
260  Result() = default;
261  Result(int32_t time_ms,int category, int8_t score){
262  this->time_ms = time_ms;
263  this->category = category;
264  this->score = score;
265  }
266  };
267 
268  TfLiteConfig cfg;
269  Vector <Result> result_queue;
270  int previous_cateogory=-1;
271  int32_t current_time_ms=0;
272  int32_t previous_time_ms=0;
273  int32_t time_since_last_top=0;
274 
276  int resultCategoryIdx(int8_t* score) {
277  int result = -1;
278  uint8_t top_score = std::numeric_limits<uint8_t>::min();
279  for (int j=0;j<categoryCount();j++){
280  if (score[j]>top_score){
281  result = j;
282  }
283  }
284  return result;
285  }
286 
289  return cfg.categoryCount();
290  }
291 
293  void deleteOldRecords(int32_t limit) {
294  if (result_queue.empty()) return;
295  while (result_queue[0].time_ms<limit){
296  result_queue.pop_front();
297  }
298  }
299 
301  TfLiteStatus evaluate(const char** found_command, uint8_t* result_score, bool* is_new_command) {
302  TRACED();
303  float totals[categoryCount()]={0};
304  int count[categoryCount()]={0};
305  // calculate totals
306  for (int j=0;j<result_queue.size();j++){
307  int idx = result_queue[j].category;
308  totals[idx] += result_queue[j].score;
309  count[idx]++;
310  }
311 
312  // find max
313  int maxIdx = -1;
314  float max = -100000;
315  for (int j=0;j<categoryCount();j++){
316  if (totals[j]>max){
317  max = totals[j];
318  maxIdx = j;
319  }
320  }
321 
322  if (maxIdx==-1){
323  LOGE("Could not find max category")
324  return kTfLiteError;
325  }
326 
327  // determine result
328  *result_score = totals[maxIdx] / count[maxIdx];
329  *found_command = cfg.labels[maxIdx];
330 
331  if (previous_cateogory!=maxIdx
332  && *result_score > cfg.detection_threshold
333  && time_since_last_top > cfg.suppression_ms){
334  previous_time_ms = current_time_ms;
335  previous_cateogory = maxIdx;
336  *is_new_command = true;
337  } else {
338  *is_new_command = false;
339  }
340 
341  LOGD("Category: %s, score: %d, is_new: %d",*found_command, *result_score, *is_new_command);
342 
343  return kTfLiteOk;
344  }
345 
347  TfLiteStatus validate(const TfLiteTensor* latest_results) {
348  if ((latest_results->dims->size != 2) ||
349  (latest_results->dims->data[0] != 1) ||
350  (latest_results->dims->data[1] != categoryCount())) {
351  LOGE(
352  "The results for recognition should contain %d "
353  "elements, but there are "
354  "%d in an %d-dimensional shape",
355  categoryCount(), (int)latest_results->dims->data[1],
356  (int)latest_results->dims->size);
357  return kTfLiteError;
358  }
359 
360  if (latest_results->type != kTfLiteInt8) {
361  LOGE("The results for recognition should be int8 elements, but are %d",
362  (int)latest_results->type);
363  return kTfLiteError;
364  }
365 
366  if ((!result_queue.empty()) &&
367  (current_time_ms < result_queue[0].time_ms)) {
368  LOGE("Results must be in increasing time order: timestamp %d < %d",
369  (int)current_time_ms, (int)result_queue[0].time_ms);
370  return kTfLiteError;
371  }
372  return kTfLiteOk;
373  }
374 
375 };
376 
377 
386  public:
387  virtual void setInterpreter(tflite::MicroInterpreter* p_interpreter) = 0;
388  virtual TfLiteConfig defaultConfig() = 0;
389  virtual bool begin(TfLiteConfig config) = 0;
390  virtual int availableToWrite() = 0;
391 
393  virtual size_t write(const uint8_t* data, size_t len)= 0;
394  virtual tflite::MicroInterpreter& interpreter()= 0;
395 
397  virtual TfLiteConfig &config()= 0;
398 
400  virtual int8_t* modelInputBuffer()= 0;
401 };
402 
410  public:
411  TfLiteMicroSpeachWriter() = default;
412 
414  if (p_buffer != nullptr) delete p_buffer;
415  if (p_audio_samples != nullptr) delete p_audio_samples;
416  }
417 
419  virtual bool begin(TfLiteAudioStreamBase *parent) {
420  TRACED();
421  this->parent = parent;
422  cfg = parent->config();
423  current_time = 0;
424  kMaxAudioSampleSize = cfg.audioSampleSize();
425  kStrideSampleSize = cfg.strideSampleSize();
426  kKeepSampleSize = kMaxAudioSampleSize - kStrideSampleSize;
427 
428  if (!setup_recognizer()) {
429  LOGE("setup_recognizer");
430  return false;
431  }
432 
433  // setup FrontendConfig
434  TfLiteStatus init_status = initializeMicroFeatures();
435  if (init_status != kTfLiteOk) {
436  return false;
437  }
438 
439  // Allocate ring buffer
440  if (p_buffer == nullptr) {
441  p_buffer = new audio_tools::RingBuffer<int16_t>(kMaxAudioSampleSize);
442  LOGD("Allocating buffer for %d samples", kMaxAudioSampleSize);
443  }
444 
445  // Initialize the feature data to default values.
446  if (p_feature_data == nullptr) {
447  p_feature_data = new int8_t[cfg.featureElementCount()];
448  memset(p_feature_data, 0, cfg.featureElementCount());
449  }
450 
451  // allocate p_audio_samples
452  if (p_audio_samples == nullptr) {
453  p_audio_samples = new int16_t[kMaxAudioSampleSize];
454  memset(p_audio_samples, 0, kMaxAudioSampleSize * sizeof(int16_t));
455  }
456 
457  return true;
458  }
459 
460  virtual bool write(int16_t sample) {
461  TRACED();
462  if (!write1(sample)){
463  // determine time
464  current_time += cfg.kFeatureSliceStrideMs;
465  // determine slice
466  total_slice_count++;
467 
468  int8_t* feature_buffer = addSlice();
469  if (total_slice_count >= cfg.kSlicesToProcess) {
470  processSlices(feature_buffer);
471  // reset total_slice_count
472  total_slice_count = 0;
473  }
474  }
475  return true;
476  }
477 
478  protected:
479  TfLiteConfig cfg;
480  TfLiteAudioStreamBase *parent=nullptr;
481  int8_t* p_feature_data = nullptr;
482  int16_t* p_audio_samples = nullptr;
483  audio_tools::RingBuffer<int16_t>* p_buffer = nullptr;
484  FrontendState g_micro_features_state;
485  FrontendConfig config;
486  int kMaxAudioSampleSize;
487  int kStrideSampleSize;
488  int kKeepSampleSize;
489  int16_t last_value;
490  int8_t channel = 0;
491  int32_t current_time = 0;
492  int16_t total_slice_count = 0;
493 
494  virtual bool setup_recognizer() {
495  // setup default p_recognizer if not defined
496  if (cfg.recognizeCommands == nullptr) {
497  static TfLiteMicroSpeechRecognizeCommands static_recognizer;
498  cfg.recognizeCommands = &static_recognizer;
499  }
500  return cfg.recognizeCommands->begin(cfg);
501  }
502 
504  virtual bool write1(const int16_t sample) {
505  if (cfg.channels == 1) {
506  p_buffer->write(sample);
507  } else {
508  if (channel == 0) {
509  last_value = sample;
510  channel = 1;
511  } else
512  // calculate avg of 2 channels and convert it to int8_t
513  p_buffer->write(((sample / 2) + (last_value / 2)));
514  channel = 0;
515  }
516  return p_buffer->availableForWrite() > 0;
517  }
518 
519  // If we can avoid recalculating some slices, just move the existing
520  // data up in the spectrogram, to perform something like this: last time
521  // = 80ms current time = 120ms
522  // +-----------+ +-----------+
523  // | data@20ms | --> | data@60ms |
524  // +-----------+ -- +-----------+
525  // | data@40ms | -- --> | data@80ms |
526  // +-----------+ -- -- +-----------+
527  // | data@60ms | -- -- | <empty> |
528  // +-----------+ -- +-----------+
529  // | data@80ms | -- | <empty> |
530  // +-----------+ +-----------+
531  virtual int8_t* addSlice() {
532  TRACED();
533  // shift p_feature_data by one slice one one
534  memmove(p_feature_data, p_feature_data + cfg.kFeatureSliceSize,
535  (cfg.kFeatureSliceCount - 1) * cfg.kFeatureSliceSize);
536 
537  // copy data from buffer to p_audio_samples
538  int audio_samples_size =
539  p_buffer->readArray(p_audio_samples, kMaxAudioSampleSize);
540 
541  // check size
542  if (audio_samples_size != kMaxAudioSampleSize) {
543  LOGE("audio_samples_size=%d != kMaxAudioSampleSize=%d",
544  audio_samples_size, kMaxAudioSampleSize);
545  }
546 
547  // keep some data to be reprocessed - move by kStrideSampleSize
548  p_buffer->writeArray(p_audio_samples + kStrideSampleSize, kKeepSampleSize);
549 
550  // the new slice data will always be stored at the end
551  int8_t* new_slice_data =
552  p_feature_data + ((cfg.kFeatureSliceCount - 1) * cfg.kFeatureSliceSize);
553  size_t num_samples_read = 0;
554  if (generateMicroFeatures(p_audio_samples, audio_samples_size,
555  new_slice_data, cfg.kFeatureSliceSize,
556  &num_samples_read) != kTfLiteOk) {
557  LOGE("Error generateMicroFeatures");
558  }
559  // printFeatures();
560  return p_feature_data;
561  }
562 
563  // Process multiple slice of audio data
564  virtual bool processSlices(int8_t* feature_buffer) {
565  LOGI("->slices: %d", total_slice_count);
566  // Copy feature buffer to input tensor
567  memcpy(parent->modelInputBuffer(), feature_buffer, cfg.featureElementCount());
568 
569  // Run the model on the spectrogram input and make sure it succeeds.
570  TfLiteStatus invoke_status = parent->interpreter().Invoke();
571  if (invoke_status != kTfLiteOk) {
572  LOGE("Invoke failed");
573  return false;
574  }
575 
576  // Obtain a pointer to the output tensor
577  TfLiteTensor* output = parent->interpreter().output(0);
578 
579  // Determine whether a command was recognized
580  const char* found_command = nullptr;
581  uint8_t score = 0;
582  bool is_new_command = false;
583 
584  TfLiteStatus process_status = cfg.recognizeCommands->getCommand(
585  output, current_time, &found_command, &score, &is_new_command);
586  if (process_status != kTfLiteOk) {
587  LOGE("TfLiteMicroSpeechRecognizeCommands::getCommand() failed");
588  return false;
589  }
590  // Do something based on the recognized command. The default
591  // implementation just prints to the error console, but you should replace
592  // this with your own function for a real application.
593  respondToCommand(found_command, score, is_new_command);
594  return true;
595  }
596 
598  void printFeatures() {
599  for (int i = 0; i < cfg.kFeatureSliceCount; i++) {
600  for (int j = 0; j < cfg.kFeatureSliceSize; j++) {
601  Serial.print(p_feature_data[(i * cfg.kFeatureSliceSize) + j]);
602  Serial.print(" ");
603  }
604  Serial.println();
605  }
606  Serial.println("------------");
607  }
608 
609  virtual TfLiteStatus initializeMicroFeatures() {
610  TRACED();
611  config.window.size_ms = cfg.kFeatureSliceDurationMs;
612  config.window.step_size_ms = cfg.kFeatureSliceStrideMs;
613  config.filterbank.num_channels = cfg.kFeatureSliceSize;
614  config.filterbank.lower_band_limit = cfg.filterbank_lower_band_limit;
615  config.filterbank.upper_band_limit = cfg.filterbank_upper_band_limit;
616  config.noise_reduction.smoothing_bits = cfg.noise_reduction_smoothing_bits;
617  config.noise_reduction.even_smoothing = cfg.noise_reduction_even_smoothing;
618  config.noise_reduction.odd_smoothing = cfg.noise_reduction_odd_smoothing;
619  config.noise_reduction.min_signal_remaining = cfg.noise_reduction_min_signal_remaining;
620  config.pcan_gain_control.enable_pcan = cfg.pcan_gain_control_enable_pcan;
621  config.pcan_gain_control.strength = cfg.pcan_gain_control_strength;
622  config.pcan_gain_control.offset = cfg.pcan_gain_control_offset ;
623  config.pcan_gain_control.gain_bits = cfg.pcan_gain_control_gain_bits;
624  config.log_scale.enable_log = cfg.log_scale_enable_log;
625  config.log_scale.scale_shift = cfg.log_scale_scale_shift;
626  if (!FrontendPopulateState(&config, &g_micro_features_state,
627  cfg.sample_rate)) {
628  LOGE("frontendPopulateState() failed");
629  return kTfLiteError;
630  }
631  return kTfLiteOk;
632  }
633 
634  virtual TfLiteStatus generateMicroFeatures(const int16_t* input,
635  int input_size, int8_t* output,
636  int output_size,
637  size_t* num_samples_read) {
638  TRACED();
639  const int16_t* frontend_input = input;
640 
641  // Apply FFT
642  FrontendOutput frontend_output = FrontendProcessSamples(
643  &g_micro_features_state, frontend_input, input_size, num_samples_read);
644 
645  // Check size
646  if (output_size != frontend_output.size) {
647  LOGE("output_size=%d, frontend_output.size=%d", output_size,
648  frontend_output.size);
649  }
650 
651  // printf("input_size: %d, num_samples_read: %d,output_size: %d,
652  // frontend_output.size:%d \n", input_size, *num_samples_read, output_size,
653  // frontend_output.size);
654 
655  // // check generated features
656  // if (input_size != *num_samples_read){
657  // LOGE("audio_samples_size=%d vs num_samples_read=%d", input_size,
658  // *num_samples_read);
659  // }
660 
661  for (size_t i = 0; i < frontend_output.size; ++i) {
662  // These scaling values are derived from those used in input_data.py in
663  // the training pipeline. The feature pipeline outputs 16-bit signed
664  // integers in roughly a 0 to 670 range. In training, these are then
665  // arbitrarily divided by 25.6 to get float values in the rough range of
666  // 0.0 to 26.0. This scaling is performed for historical reasons, to match
667  // up with the output of other feature generators. The process is then
668  // further complicated when we quantize the model. This means we have to
669  // scale the 0.0 to 26.0 real values to the -128 to 127 signed integer
670  // numbers. All this means that to get matching values from our integer
671  // feature output into the tensor input, we have to perform: input =
672  // (((feature / 25.6) / 26.0) * 256) - 128 To simplify this and perform it
673  // in 32-bit integer math, we rearrange to: input = (feature * 256) /
674  // (25.6 * 26.0) - 128
675  constexpr int32_t value_scale = 256;
676  constexpr int32_t value_div =
677  static_cast<int32_t>((25.6f * 26.0f) + 0.5f);
678  int32_t value =
679  ((frontend_output.values[i] * value_scale) + (value_div / 2)) /
680  value_div;
681  value -= 128;
682  if (value < -128) {
683  value = -128;
684  }
685  if (value > 127) {
686  value = 127;
687  }
688  output[i] = value;
689  }
690 
691  return kTfLiteOk;
692  }
693 
695  virtual void respondToCommand(const char* found_command, uint8_t score,
696  bool is_new_command) {
697  if (cfg.respondToCommand != nullptr) {
698  cfg.respondToCommand(found_command, score, is_new_command);
699  } else {
700  TRACED();
701  if (is_new_command) {
702  char buffer[80];
703  snprintf(buffer, 80, "Result: %s, score: %d, is_new: %s", found_command,
704  score, is_new_command ? "true" : "false");
705  Serial.println(buffer);
706  }
707  }
708  }
709 };
710 
719  public: TfLiteSineReader(int16_t range=32767, float increment=0.01 ){
720  this->increment = increment;
721  this->range = range;
722  }
723 
724  virtual bool begin(TfLiteAudioStreamBase *parent) override {
725  // setup on first call
726  p_interpreter = &parent->interpreter();
727  input = p_interpreter->input(0);
728  output = p_interpreter->output(0);
729  channels = parent->config().channels;
730  return true;
731  }
732 
733  virtual int read(int16_t*data, int sampleCount) override {
734  TRACED();
735  float two_pi = 2 * PI;
736  for (int j=0; j<sampleCount; j+=channels){
737  // Quantize the input from floating-point to integer
738  input->data.int8[0] = TfLiteQuantizer::quantize(actX,input->params.scale, input->params.zero_point);
739 
740  // Invoke TF Model
741  TfLiteStatus invoke_status = p_interpreter->Invoke();
742 
743  // Check the result
744  if(kTfLiteOk!= invoke_status){
745  LOGE("invoke_status not ok");
746  return j;
747  }
748  if(kTfLiteInt8 != output->type){
749  LOGE("Output type is not kTfLiteInt8");
750  return j;
751  }
752 
753  // Dequantize the output and convet it to int32 range
754  data[j] = TfLiteQuantizer::dequantizeToNewRange(output->data.int8[0], output->params.scale, output->params.zero_point, range);
755  // printf("%d\n", data[j]); // for debugging using the Serial Plotter
756  LOGD("%f->%d / %d->%d",actX, input->data.int8[0], output->data.int8[0], data[j]);
757  for (int i=1;i<channels;i++){
758  data[j+i] = data[j];
759  LOGD("generate data for channels");
760  }
761  // Increment X
762  actX += increment;
763  if (actX>two_pi){
764  actX-=two_pi;
765  }
766  }
767  return sampleCount;
768  }
769 
770  protected:
771  float actX=0;
772  float increment=0.1;
773  int16_t range=0;
774  int channels;
775  TfLiteTensor* input = nullptr;
776  TfLiteTensor* output = nullptr;
777  tflite::MicroInterpreter* p_interpreter = nullptr;
778 };
779 
787  public:
788  TfLiteAudioStream() {}
789  ~TfLiteAudioStream() {
790  if (p_tensor_arena != nullptr) delete[] p_tensor_arena;
791  }
792 
793 
795  void setInterpreter(tflite::MicroInterpreter* p_interpreter) {
796  TRACED();
797  this->p_interpreter = p_interpreter;
798  }
799 
800  // Provides the default configuration
801  virtual TfLiteConfig defaultConfig() override {
802  TfLiteConfig def;
803  return def;
804  }
805 
807  virtual bool begin(TfLiteConfig config) override {
808  TRACED();
809  cfg = config;
810 
811  // alloatme memory
812  p_tensor_arena = new uint8_t[cfg.kTensorArenaSize];
813 
814  if (cfg.categoryCount()>0){
815 
816  // setup the feature provider
817  if (!setupWriter()) {
818  LOGE("setupWriter");
819  return false;
820  }
821  } else {
822  LOGW("categoryCount=%d", cfg.categoryCount());
823  }
824 
825  // Map the model into a usable data structure. This doesn't involve any
826  // copying or parsing, it's a very lightweight operation.
827  if (!setModel(cfg.model)) {
828  return false;
829  }
830 
831  if (!setupInterpreter()) {
832  return false;
833  }
834 
835  // Allocate memory from the p_tensor_arena for the model's tensors.
836  LOGI("AllocateTensors");
837  TfLiteStatus allocate_status = p_interpreter->AllocateTensors();
838  if (allocate_status != kTfLiteOk) {
839  LOGE("AllocateTensors() failed");
840  return false;
841  }
842 
843  // Get information about the memory area to use for the model's input.
844  LOGI("Get Input");
845  p_tensor = p_interpreter->input(0);
846  if (cfg.categoryCount()>0){
847  if ((p_tensor->dims->size != 2) || (p_tensor->dims->data[0] != 1) ||
848  (p_tensor->dims->data[1] !=
849  (cfg.kFeatureSliceCount * cfg.kFeatureSliceSize)) ||
850  (p_tensor->type != kTfLiteInt8)) {
851  LOGE("Bad input tensor parameters in model");
852  return false;
853  }
854  }
855 
856  LOGI("Get Buffer");
857  p_tensor_buffer = p_tensor->data.int8;
858  if (p_tensor_buffer == nullptr) {
859  LOGE("p_tensor_buffer is null");
860  return false;
861  }
862 
863  // setup reader
864  if (cfg.reader!=nullptr){
865  cfg.reader->begin(this);
866  }
867 
868  // all good if we made it here
869  is_setup = true;
870  LOGI("done");
871  return true;
872  }
873 
875  virtual int availableToWrite() override { return DEFAULT_BUFFER_SIZE; }
876 
878  virtual size_t write(const uint8_t* data, size_t len) override {
879  TRACED();
880  if (cfg.writer==nullptr){
881  LOGE("cfg.output is null");
882  return 0;
883  }
884  int16_t* samples = (int16_t*)data;
885  int16_t sample_count = len / 2;
886  for (int j = 0; j < sample_count; j++) {
887  cfg.writer->write(samples[j]);
888  }
889  return len;
890  }
891 
893  virtual int available() override { return cfg.reader != nullptr ? DEFAULT_BUFFER_SIZE : 0; }
894 
896  virtual size_t readBytes(uint8_t *data, size_t len) override {
897  TRACED();
898  if (cfg.reader!=nullptr){
899  return cfg.reader->read((int16_t*)data, (int) len/sizeof(int16_t)) * sizeof(int16_t);
900  }else {
901  return 0;
902  }
903  }
904 
906  tflite::MicroInterpreter& interpreter() override {
907  return *p_interpreter;
908  }
909 
911  TfLiteConfig &config() override {
912  return cfg;
913  }
914 
916  int8_t* modelInputBuffer() override {
917  return p_tensor_buffer;
918  }
919 
920  protected:
921  const tflite::Model* p_model = nullptr;
922  tflite::MicroInterpreter* p_interpreter = nullptr;
923  TfLiteTensor* p_tensor = nullptr;
924  bool is_setup = false;
925  TfLiteConfig cfg;
926  // Create an area of memory to use for input, output, and intermediate
927  // arrays. The size of this will depend on the model you're using, and may
928  // need to be determined by experimentation.
929  uint8_t* p_tensor_arena = nullptr;
930  int8_t* p_tensor_buffer = nullptr;
931 
932  virtual bool setModel(const unsigned char* model) {
933  TRACED();
934  p_model = tflite::GetModel(model);
935  if (p_model->version() != TFLITE_SCHEMA_VERSION) {
936  LOGE(
937  "Model provided is schema version %d not equal "
938  "to supported version %d.",
939  p_model->version(), TFLITE_SCHEMA_VERSION);
940  return false;
941  }
942  return true;
943  }
944 
945  virtual bool setupWriter() {
946  if (cfg.writer == nullptr) {
947  static TfLiteMicroSpeachWriter writer;
948  cfg.writer = &writer;
949  }
950  return cfg.writer->begin(this);
951  }
952 
953  // Pull in only the operation implementations we need.
954  // This relies on a complete list of all the ops needed by this graph.
955  // An easier approach is to just use the AllOpsResolver, but this will
956  // incur some penalty in code space for op implementations that are not
957  // needed by this graph.
958  //
959  virtual bool setupInterpreter() {
960  if (p_interpreter == nullptr) {
961  TRACEI();
962  if (cfg.useAllOpsResolver) {
963  tflite::AllOpsResolver resolver;
964  static tflite::MicroInterpreter static_interpreter{
965  p_model, resolver, p_tensor_arena, cfg.kTensorArenaSize};
966  p_interpreter = &static_interpreter;
967  } else {
968  // NOLINTNEXTLINE(runtime-global-variables)
969  static tflite::MicroMutableOpResolver<4> micro_op_resolver{};
970  if (micro_op_resolver.AddDepthwiseConv2D() != kTfLiteOk) {
971  return false;
972  }
973  if (micro_op_resolver.AddFullyConnected() != kTfLiteOk) {
974  return false;
975  }
976  if (micro_op_resolver.AddSoftmax() != kTfLiteOk) {
977  return false;
978  }
979  if (micro_op_resolver.AddReshape() != kTfLiteOk) {
980  return false;
981  }
982  // Build an p_interpreter to run the model with.
983  static tflite::MicroInterpreter static_interpreter{
984  p_model, micro_op_resolver, p_tensor_arena, cfg.kTensorArenaSize};
985  p_interpreter = &static_interpreter;
986  }
987  }
988  return true;
989  }
990 };
991 
992 } // namespace audio_tools
Base class for all Audio Streams. It support the boolean operator to test if the object is ready with...
Definition: BaseStream.h:109
virtual int readArray(T data[], int len)
reads multiple values
Definition: Buffers.h:41
virtual int writeArray(const T data[], int len)
Fills the buffer data.
Definition: Buffers.h:65
virtual int availableForWrite()
provides the number of entries that are available to write
Definition: Buffers.h:369
virtual bool write(T data)
write add an entry to the buffer
Definition: Buffers.h:347
Base class for implementing different primitive decoding models on top of the instantaneous results f...
Definition: TfLiteAudioStream.h:193
Astract TfLiteAudioStream to provide access to TfLiteAudioStream for Reader and Writers.
Definition: TfLiteAudioStream.h:385
virtual int8_t * modelInputBuffer()=0
Provides access to the model input buffer.
virtual TfLiteConfig & config()=0
Provides the TfLiteConfig information.
virtual size_t write(const uint8_t *data, size_t len)=0
process the data in batches of max kMaxAudioSampleSize.
TfLiteAudioStream which uses Tensorflow Light to analyze the data. If it is used as a generator (wher...
Definition: TfLiteAudioStream.h:786
virtual size_t write(const uint8_t *data, size_t len) override
process the data in batches of max kMaxAudioSampleSize.
Definition: TfLiteAudioStream.h:878
virtual size_t readBytes(uint8_t *data, size_t len) override
provide audio data with cfg.input
Definition: TfLiteAudioStream.h:896
virtual bool begin(TfLiteConfig config) override
Start the processing.
Definition: TfLiteAudioStream.h:807
tflite::MicroInterpreter & interpreter() override
Provides the tf lite interpreter.
Definition: TfLiteAudioStream.h:906
void setInterpreter(tflite::MicroInterpreter *p_interpreter)
Optionally define your own p_interpreter.
Definition: TfLiteAudioStream.h:795
int8_t * modelInputBuffer() override
Provides access to the model input buffer.
Definition: TfLiteAudioStream.h:916
TfLiteConfig & config() override
Provides the TfLiteConfig information.
Definition: TfLiteAudioStream.h:911
virtual int availableToWrite() override
Constant streaming.
Definition: TfLiteAudioStream.h:875
virtual int available() override
We can provide only some audio data when cfg.input is defined.
Definition: TfLiteAudioStream.h:893
TfLiteMicroSpeachWriter for Audio Data.
Definition: TfLiteAudioStream.h:409
void printFeatures()
For debugging: print feature matrix.
Definition: TfLiteAudioStream.h:598
virtual bool write1(const int16_t sample)
Processes a single sample.
Definition: TfLiteAudioStream.h:504
virtual bool begin(TfLiteAudioStreamBase *parent)
Call begin before starting the processing.
Definition: TfLiteAudioStream.h:419
virtual void respondToCommand(const char *found_command, uint8_t score, bool is_new_command)
Overwrite this method to implement your own handler or provide callback.
Definition: TfLiteAudioStream.h:695
This class is designed to apply a very primitive decoding model on top of the instantaneous results f...
Definition: TfLiteAudioStream.h:214
TfLiteStatus validate(const TfLiteTensor *latest_results)
Checks the input data.
Definition: TfLiteAudioStream.h:347
TfLiteStatus evaluate(const char **found_command, uint8_t *result_score, bool *is_new_command)
Finds the result.
Definition: TfLiteAudioStream.h:301
void deleteOldRecords(int32_t limit)
Removes obsolete records from the queue.
Definition: TfLiteAudioStream.h:293
int categoryCount()
Determines the number of categories.
Definition: TfLiteAudioStream.h:288
bool begin(TfLiteConfig cfg) override
Setup parameters from config.
Definition: TfLiteAudioStream.h:221
int resultCategoryIdx(int8_t *score)
finds the category with the biggest score
Definition: TfLiteAudioStream.h:276
Quantizer that helps to quantize and dequantize between float and int8.
Definition: TfLiteAudioStream.h:160
Input class which provides the next value if the TfLiteAudioStream is treated as an audio sourcce.
Definition: TfLiteAudioStream.h:41
Generate a sine output from a model that was trained on the sine method. (=hello_world)
Definition: TfLiteAudioStream.h:718
Output class which interprets audio data if TfLiteAudioStream is treated as audio sink.
Definition: TfLiteAudioStream.h:53
Vector implementation which provides the most important methods as defined by std::vector....
Definition: Vector.h:21
Generic Implementation of sound input and output for desktop environments using portaudio.
Definition: AudioConfig.h:823
Configuration settings for TfLiteAudioStream.
Definition: TfLiteAudioStream.h:66
void setCategories(const char *(&array)[N])
Defines the labels.
Definition: TfLiteAudioStream.h:128