arduino-audio-tools
TfLiteAudioStream.h
1 #pragma once
2 
3 // Configure FFT to output 16 bit fixed point.
4 #define FIXED_POINT 16
5 
6 #include <TensorFlowLite.h>
7 #include <cmath>
8 #include <cstdint>
9 #include "AudioTools/AudioOutput.h"
10 #include "AudioTools/Buffers.h"
11 #include "tensorflow/lite/c/common.h"
12 #include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
13 #include "tensorflow/lite/experimental/microfrontend/lib/frontend_util.h"
14 #include "tensorflow/lite/micro/all_ops_resolver.h"
15 #include "tensorflow/lite/micro/kernels/micro_ops.h"
16 #include "tensorflow/lite/micro/micro_error_reporter.h"
17 #include "tensorflow/lite/micro/micro_interpreter.h"
18 #include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
19 #include "tensorflow/lite/micro/system_setup.h"
20 #include "tensorflow/lite/schema/schema_generated.h"
21 
29 namespace audio_tools {
30 
31 // Forward Declarations
32 class TfLiteAudioStreamBase;
33 class TfLiteAbstractRecognizeCommands;
34 
41 class TfLiteReader {
42  public:
43  virtual bool begin(TfLiteAudioStreamBase *parent) = 0;
44  virtual int read(int16_t*data, int len) = 0;
45 };
46 
53 class TfLiteWriter {
54  public:
55  virtual bool begin(TfLiteAudioStreamBase *parent) = 0;
56  virtual bool write(const int16_t sample) = 0;
57 };
64 class TfLiteAudioErrorReporter : public tflite::ErrorReporter {
65  public:
66  virtual ~TfLiteAudioErrorReporter() {}
67  virtual int Report(const char* format, va_list args) override {
68  int result = snprintf(msg, 200, format, args);
69  LOGE(msg);
70  return result;
71  }
72 
73  protected:
74  char msg[200];
75 } my_error_reporter;
76 tflite::ErrorReporter* error_reporter = &my_error_reporter;
77 
85 struct TfLiteConfig {
87  const unsigned char* model = nullptr;
88  TfLiteReader *reader = nullptr;
89  TfLiteWriter *writer = nullptr;
90  TfLiteAbstractRecognizeCommands *recognizeCommands=nullptr;
91  bool useAllOpsResolver = false;
92  // callback for command handler
93  void (*respondToCommand)(const char* found_command, uint8_t score,
94  bool is_new_command) = nullptr;
95 
96  // Create an area of memory to use for input, output, and intermediate arrays.
97  // The size of this will depend on the model you’re using, and may need to be
98  // determined by experimentation.
99  int kTensorArenaSize = 10 * 1024;
100 
101  // Keeping these as constant expressions allow us to allocate fixed-sized
102  // arrays on the stack for our working memory.
103 
104  // The size of the input time series data we pass to the FFT to produce
105  // the frequency information. This has to be a power of two, and since
106  // we're dealing with 30ms of 16KHz inputs, which means 480 samples, this
107  // is the next value.
108  // int kMaxAudioSampleSize = 320; //512; // 480
109  int sample_rate = 16000;
110 
111  // Number of audio channels - is usually 1. If 2 we reduce it to 1 by
112  // averaging the 2 channels
113  int channels = 1;
114 
115  // The following values are derived from values used during model training.
116  // If you change the way you preprocess the input, update all these constants.
117  int kFeatureSliceSize = 40;
118  int kFeatureSliceCount = 49;
119  int kFeatureSliceStrideMs = 20;
120  int kFeatureSliceDurationMs = 30;
121 
122  // number of new slices to collect before evaluating the model
123  int kSlicesToProcess = 2;
124 
125  // Parameters for RecognizeCommands
126  int32_t average_window_duration_ms = 1000;
127  uint8_t detection_threshold = 50;
128  int32_t suppression_ms = 1500;
129  int32_t minimum_count = 3;
130 
131  // input for FrontendConfig
132  float filterbank_lower_band_limit = 125.0;
133  float filterbank_upper_band_limit = 7500.0;
134  float noise_reduction_smoothing_bits = 10;
135  float noise_reduction_even_smoothing = 0.025;
136  float noise_reduction_odd_smoothing = 0.06;
137  float noise_reduction_min_signal_remaining = 0.05;
138  bool pcan_gain_control_enable_pcan = 1;
139  float pcan_gain_control_strength = 0.95;
140  float pcan_gain_control_offset = 80.0;
141  float pcan_gain_control_gain_bits = 21;
142  bool log_scale_enable_log = 1;
143  uint8_t log_scale_scale_shift = 6;
144 
146  template<int N>
147  void setCategories(const char* (&array)[N]){
148  labels = array;
149  kCategoryCount = N;
150  }
151 
152  int categoryCount() {
153  return kCategoryCount;
154  }
155 
156  int featureElementCount() {
157  return kFeatureSliceSize * kFeatureSliceCount;
158  }
159 
160  int audioSampleSize() {
161  return kFeatureSliceDurationMs * (sample_rate / 1000);
162  }
163 
164  int strideSampleSize() {
165  return kFeatureSliceStrideMs * (sample_rate / 1000);
166  }
167 
168  private:
169  int kCategoryCount = 0;
170  const char** labels = nullptr;
171 };
172 
180  public:
181  // convert float to int8
182  static int8_t quantize(float value, float scale, float zero_point){
183  if(scale==0.0&&zero_point==0) return value;
184  return value / scale + zero_point;
185  }
186  // convert int8 to float
187  static float dequantize(int8_t value, float scale, float zero_point){
188  if(scale==0.0&&zero_point==0) return value;
189  return (value - zero_point) * scale;
190  }
191 
192  static float dequantizeToNewRange(int8_t value, float scale, float zero_point, float new_range){
193  float deq = (static_cast<float>(value) - zero_point) * scale;
194  return clip(deq * new_range, new_range);
195  }
196 
197  static float clip(float value, float range){
198  if (value>=0.0){
199  return value > range ? range : value;
200  } else {
201  return -value < -range ? -range : value;
202  }
203  }
204 };
205 
213  public:
214  virtual bool begin(TfLiteConfig cfg) = 0;
215  virtual TfLiteStatus getCommand(const TfLiteTensor* latest_results, const int32_t current_time_ms,
216  const char** found_command,uint8_t* score,bool* is_new_command) = 0;
217 
218 };
219 
234  public:
235 
237  }
238 
240  bool begin(TfLiteConfig cfg) override {
241  TRACED();
242  this->cfg = cfg;
243  if (cfg.labels == nullptr) {
244  LOGE("config.labels not defined");
245  return false;
246  }
247  return true;
248  }
249 
250  // Call this with the results of running a model on sample data.
251  virtual TfLiteStatus getCommand(const TfLiteTensor* latest_results,
252  const int32_t current_time_ms,
253  const char** found_command,
254  uint8_t* score,
255  bool* is_new_command) override {
256 
257  TRACED();
258  this->current_time_ms = current_time_ms;
259  this->time_since_last_top = current_time_ms - previous_time_ms;
260 
261  deleteOldRecords(current_time_ms - cfg.average_window_duration_ms);
262  int idx = resultCategoryIdx(latest_results->data.int8);
263  Result row(current_time_ms, idx, latest_results->data.int8[idx]);
264  result_queue.push_back(row);
265 
266  TfLiteStatus result = validate(latest_results);
267  if (result!=kTfLiteOk){
268  return result;
269  }
270  return evaluate(found_command, score, is_new_command);
271  }
272 
273  protected:
274  struct Result {
275  int32_t time_ms;
276  int category=0;
277  int8_t score=0;
278 
279  Result() = default;
280  Result(int32_t time_ms,int category, int8_t score){
281  this->time_ms = time_ms;
282  this->category = category;
283  this->score = score;
284  }
285  };
286 
287  TfLiteConfig cfg;
288  Vector <Result> result_queue;
289  int previous_cateogory=-1;
290  int32_t current_time_ms=0;
291  int32_t previous_time_ms=0;
292  int32_t time_since_last_top=0;
293 
295  int resultCategoryIdx(int8_t* score) {
296  int result = -1;
297  uint8_t top_score = std::numeric_limits<uint8_t>::min();
298  for (int j=0;j<categoryCount();j++){
299  if (score[j]>top_score){
300  result = j;
301  }
302  }
303  return result;
304  }
305 
308  return cfg.categoryCount();
309  }
310 
312  void deleteOldRecords(int32_t limit) {
313  while (result_queue[0].time_ms<limit){
314  result_queue.pop_front();
315  }
316  }
317 
319  TfLiteStatus evaluate(const char** found_command, uint8_t* result_score, bool* is_new_command) {
320  TRACED();
321  float totals[categoryCount()]={0};
322  int count[categoryCount()]={0};
323  // calculate totals
324  for (int j=0;j<result_queue.size();j++){
325  int idx = result_queue[j].category;
326  totals[idx] += result_queue[j].score;
327  count[idx]++;
328  }
329 
330  // find max
331  int maxIdx = -1;
332  float max = -100000;
333  for (int j=0;j<categoryCount();j++){
334  if (totals[j]>max){
335  max = totals[j];
336  maxIdx = j;
337  }
338  }
339 
340  if (maxIdx==-1){
341  LOGE("Could not find max category")
342  return kTfLiteError;
343  }
344 
345  // determine result
346  *result_score = totals[maxIdx] / count[maxIdx];
347  *found_command = cfg.labels[maxIdx];
348 
349  if (previous_cateogory!=maxIdx
350  && *result_score > cfg.detection_threshold
351  && time_since_last_top > cfg.suppression_ms){
352  previous_time_ms = current_time_ms;
353  previous_cateogory = maxIdx;
354  *is_new_command = true;
355  } else {
356  *is_new_command = false;
357  }
358 
359  LOGD("Category: %s, score: %d, is_new: %d",*found_command, *result_score, *is_new_command);
360 
361  return kTfLiteOk;
362  }
363 
365  TfLiteStatus validate(const TfLiteTensor* latest_results) {
366  if ((latest_results->dims->size != 2) ||
367  (latest_results->dims->data[0] != 1) ||
368  (latest_results->dims->data[1] != categoryCount())) {
369  LOGE(
370  "The results for recognition should contain %d "
371  "elements, but there are "
372  "%d in an %d-dimensional shape",
373  categoryCount(), (int)latest_results->dims->data[1],
374  (int)latest_results->dims->size);
375  return kTfLiteError;
376  }
377 
378  if (latest_results->type != kTfLiteInt8) {
379  LOGE("The results for recognition should be int8 elements, but are %d",
380  (int)latest_results->type);
381  return kTfLiteError;
382  }
383 
384  if ((!result_queue.empty()) &&
385  (current_time_ms < result_queue[0].time_ms)) {
386  LOGE("Results must be in increasing time order: timestamp %d < %d",
387  (int)current_time_ms, (int)result_queue[0].time_ms);
388  return kTfLiteError;
389  }
390  return kTfLiteOk;
391  }
392 
393 };
394 
395 
404  public:
405  virtual void setInterpreter(tflite::MicroInterpreter* p_interpreter) = 0;
406  virtual TfLiteConfig defaultConfig() = 0;
407  virtual bool begin(TfLiteConfig config) = 0;
408  virtual int availableToWrite() = 0;
409 
411  virtual size_t write(const uint8_t* audio, size_t bytes)= 0;
412  virtual tflite::MicroInterpreter& interpreter()= 0;
413 
415  virtual TfLiteConfig &config()= 0;
416 
418  virtual int8_t* modelInputBuffer()= 0;
419 };
420 
428  public:
429  TfLiteMicroSpeachWriter() = default;
430 
432  if (p_buffer != nullptr) delete p_buffer;
433  if (p_audio_samples != nullptr) delete p_audio_samples;
434  }
435 
437  virtual bool begin(TfLiteAudioStreamBase *parent) {
438  TRACED();
439  this->parent = parent;
440  cfg = parent->config();
441  current_time = 0;
442  kMaxAudioSampleSize = cfg.audioSampleSize();
443  kStrideSampleSize = cfg.strideSampleSize();
444  kKeepSampleSize = kMaxAudioSampleSize - kStrideSampleSize;
445 
446  if (!setup_recognizer()) {
447  LOGE("setup_recognizer");
448  return false;
449  }
450 
451  // setup FrontendConfig
452  TfLiteStatus init_status = initializeMicroFeatures();
453  if (init_status != kTfLiteOk) {
454  return false;
455  }
456 
457  // Allocate ring buffer
458  if (p_buffer == nullptr) {
459  p_buffer = new audio_tools::RingBuffer<int16_t>(kMaxAudioSampleSize);
460  LOGD("Allocating buffer for %d samples", kMaxAudioSampleSize);
461  }
462 
463  // Initialize the feature data to default values.
464  if (p_feature_data == nullptr) {
465  p_feature_data = new int8_t[cfg.featureElementCount()];
466  memset(p_feature_data, 0, cfg.featureElementCount());
467  }
468 
469  // allocate p_audio_samples
470  if (p_audio_samples == nullptr) {
471  p_audio_samples = new int16_t[kMaxAudioSampleSize];
472  memset(p_audio_samples, 0, kMaxAudioSampleSize * sizeof(int16_t));
473  }
474 
475  return true;
476  }
477 
478  virtual bool write(int16_t sample) {
479  TRACED();
480  if (!write1(sample)){
481  // determine time
482  current_time += cfg.kFeatureSliceStrideMs;
483  // determine slice
484  total_slice_count++;
485 
486  int8_t* feature_buffer = addSlice();
487  if (total_slice_count >= cfg.kSlicesToProcess) {
488  processSlices(feature_buffer);
489  // reset total_slice_count
490  total_slice_count = 0;
491  }
492  }
493  return true;
494  }
495 
496  protected:
497  TfLiteConfig cfg;
498  TfLiteAudioStreamBase *parent=nullptr;
499  int8_t* p_feature_data = nullptr;
500  int16_t* p_audio_samples = nullptr;
501  audio_tools::RingBuffer<int16_t>* p_buffer = nullptr;
502  FrontendState g_micro_features_state;
503  FrontendConfig config;
504  int kMaxAudioSampleSize;
505  int kStrideSampleSize;
506  int kKeepSampleSize;
507  int16_t last_value;
508  int8_t channel = 0;
509  int32_t current_time = 0;
510  int16_t total_slice_count = 0;
511 
512  virtual bool setup_recognizer() {
513  // setup default p_recognizer if not defined
514  if (cfg.recognizeCommands == nullptr) {
515  static TfLiteMicroSpeechRecognizeCommands static_recognizer;
516  cfg.recognizeCommands = &static_recognizer;
517  }
518  return cfg.recognizeCommands->begin(cfg);
519  }
520 
522  virtual bool write1(const int16_t sample) {
523  if (cfg.channels == 1) {
524  p_buffer->write(sample);
525  } else {
526  if (channel == 0) {
527  last_value = sample;
528  channel = 1;
529  } else
530  // calculate avg of 2 channels and convert it to int8_t
531  p_buffer->write(((sample / 2) + (last_value / 2)));
532  channel = 0;
533  }
534  return p_buffer->availableForWrite() > 0;
535  }
536 
537  // If we can avoid recalculating some slices, just move the existing
538  // data up in the spectrogram, to perform something like this: last time
539  // = 80ms current time = 120ms
540  // +-----------+ +-----------+
541  // | data@20ms | --> | data@60ms |
542  // +-----------+ -- +-----------+
543  // | data@40ms | -- --> | data@80ms |
544  // +-----------+ -- -- +-----------+
545  // | data@60ms | -- -- | <empty> |
546  // +-----------+ -- +-----------+
547  // | data@80ms | -- | <empty> |
548  // +-----------+ +-----------+
549  virtual int8_t* addSlice() {
550  TRACED();
551  // shift p_feature_data by one slice one one
552  memmove(p_feature_data, p_feature_data + cfg.kFeatureSliceSize,
553  (cfg.kFeatureSliceCount - 1) * cfg.kFeatureSliceSize);
554 
555  // copy data from buffer to p_audio_samples
556  int audio_samples_size =
557  p_buffer->readArray(p_audio_samples, kMaxAudioSampleSize);
558 
559  // check size
560  if (audio_samples_size != kMaxAudioSampleSize) {
561  LOGE("audio_samples_size=%d != kMaxAudioSampleSize=%d",
562  audio_samples_size, kMaxAudioSampleSize);
563  }
564 
565  // keep some data to be reprocessed - move by kStrideSampleSize
566  p_buffer->writeArray(p_audio_samples + kStrideSampleSize, kKeepSampleSize);
567 
568  // the new slice data will always be stored at the end
569  int8_t* new_slice_data =
570  p_feature_data + ((cfg.kFeatureSliceCount - 1) * cfg.kFeatureSliceSize);
571  size_t num_samples_read = 0;
572  if (generateMicroFeatures(p_audio_samples, audio_samples_size,
573  new_slice_data, cfg.kFeatureSliceSize,
574  &num_samples_read) != kTfLiteOk) {
575  LOGE("Error generateMicroFeatures");
576  }
577  // printFeatures();
578  return p_feature_data;
579  }
580 
581  // Process multiple slice of audio data
582  virtual bool processSlices(int8_t* feature_buffer) {
583  LOGI("->slices: %d", total_slice_count);
584  // Copy feature buffer to input tensor
585  memcpy(parent->modelInputBuffer(), feature_buffer, cfg.featureElementCount());
586 
587  // Run the model on the spectrogram input and make sure it succeeds.
588  TfLiteStatus invoke_status = parent->interpreter().Invoke();
589  if (invoke_status != kTfLiteOk) {
590  LOGE("Invoke failed");
591  return false;
592  }
593 
594  // Obtain a pointer to the output tensor
595  TfLiteTensor* output = parent->interpreter().output(0);
596 
597  // Determine whether a command was recognized
598  const char* found_command = nullptr;
599  uint8_t score = 0;
600  bool is_new_command = false;
601 
602  TfLiteStatus process_status = cfg.recognizeCommands->getCommand(
603  output, current_time, &found_command, &score, &is_new_command);
604  if (process_status != kTfLiteOk) {
605  LOGE("TfLiteMicroSpeechRecognizeCommands::getCommand() failed");
606  return false;
607  }
608  // Do something based on the recognized command. The default
609  // implementation just prints to the error console, but you should replace
610  // this with your own function for a real application.
611  respondToCommand(found_command, score, is_new_command);
612  return true;
613  }
614 
616  void printFeatures() {
617  for (int i = 0; i < cfg.kFeatureSliceCount; i++) {
618  for (int j = 0; j < cfg.kFeatureSliceSize; j++) {
619  Serial.print(p_feature_data[(i * cfg.kFeatureSliceSize) + j]);
620  Serial.print(" ");
621  }
622  Serial.println();
623  }
624  Serial.println("------------");
625  }
626 
627  virtual TfLiteStatus initializeMicroFeatures() {
628  TRACED();
629  config.window.size_ms = cfg.kFeatureSliceDurationMs;
630  config.window.step_size_ms = cfg.kFeatureSliceStrideMs;
631  config.filterbank.num_channels = cfg.kFeatureSliceSize;
632  config.filterbank.lower_band_limit = cfg.filterbank_lower_band_limit;
633  config.filterbank.upper_band_limit = cfg.filterbank_upper_band_limit;
634  config.noise_reduction.smoothing_bits = cfg.noise_reduction_smoothing_bits;
635  config.noise_reduction.even_smoothing = cfg.noise_reduction_even_smoothing;
636  config.noise_reduction.odd_smoothing = cfg.noise_reduction_odd_smoothing;
637  config.noise_reduction.min_signal_remaining = cfg.noise_reduction_min_signal_remaining;
638  config.pcan_gain_control.enable_pcan = cfg.pcan_gain_control_enable_pcan;
639  config.pcan_gain_control.strength = cfg.pcan_gain_control_strength;
640  config.pcan_gain_control.offset = cfg.pcan_gain_control_offset ;
641  config.pcan_gain_control.gain_bits = cfg.pcan_gain_control_gain_bits;
642  config.log_scale.enable_log = cfg.log_scale_enable_log;
643  config.log_scale.scale_shift = cfg.log_scale_scale_shift;
644  if (!FrontendPopulateState(&config, &g_micro_features_state,
645  cfg.sample_rate)) {
646  LOGE("frontendPopulateState() failed");
647  return kTfLiteError;
648  }
649  return kTfLiteOk;
650  }
651 
652  virtual TfLiteStatus generateMicroFeatures(const int16_t* input,
653  int input_size, int8_t* output,
654  int output_size,
655  size_t* num_samples_read) {
656  TRACED();
657  const int16_t* frontend_input = input;
658 
659  // Apply FFT
660  FrontendOutput frontend_output = FrontendProcessSamples(
661  &g_micro_features_state, frontend_input, input_size, num_samples_read);
662 
663  // Check size
664  if (output_size != frontend_output.size) {
665  LOGE("output_size=%d, frontend_output.size=%d", output_size,
666  frontend_output.size);
667  }
668 
669  // printf("input_size: %d, num_samples_read: %d,output_size: %d,
670  // frontend_output.size:%d \n", input_size, *num_samples_read, output_size,
671  // frontend_output.size);
672 
673  // // check generated features
674  // if (input_size != *num_samples_read){
675  // LOGE("audio_samples_size=%d vs num_samples_read=%d", input_size,
676  // *num_samples_read);
677  // }
678 
679  for (size_t i = 0; i < frontend_output.size; ++i) {
680  // These scaling values are derived from those used in input_data.py in
681  // the training pipeline. The feature pipeline outputs 16-bit signed
682  // integers in roughly a 0 to 670 range. In training, these are then
683  // arbitrarily divided by 25.6 to get float values in the rough range of
684  // 0.0 to 26.0. This scaling is performed for historical reasons, to match
685  // up with the output of other feature generators. The process is then
686  // further complicated when we quantize the model. This means we have to
687  // scale the 0.0 to 26.0 real values to the -128 to 127 signed integer
688  // numbers. All this means that to get matching values from our integer
689  // feature output into the tensor input, we have to perform: input =
690  // (((feature / 25.6) / 26.0) * 256) - 128 To simplify this and perform it
691  // in 32-bit integer math, we rearrange to: input = (feature * 256) /
692  // (25.6 * 26.0) - 128
693  constexpr int32_t value_scale = 256;
694  constexpr int32_t value_div =
695  static_cast<int32_t>((25.6f * 26.0f) + 0.5f);
696  int32_t value =
697  ((frontend_output.values[i] * value_scale) + (value_div / 2)) /
698  value_div;
699  value -= 128;
700  if (value < -128) {
701  value = -128;
702  }
703  if (value > 127) {
704  value = 127;
705  }
706  output[i] = value;
707  }
708 
709  return kTfLiteOk;
710  }
711 
713  virtual void respondToCommand(const char* found_command, uint8_t score,
714  bool is_new_command) {
715  if (cfg.respondToCommand != nullptr) {
716  cfg.respondToCommand(found_command, score, is_new_command);
717  } else {
718  TRACED();
719  if (is_new_command) {
720  char buffer[80];
721  snprintf(buffer, 80, "Result: %s, score: %d, is_new: %s", found_command,
722  score, is_new_command ? "true" : "false");
723  Serial.println(buffer);
724  }
725  }
726  }
727 };
728 
737  public: TfLiteSineReader(int16_t range=32767, float increment=0.01 ){
738  this->increment = increment;
739  this->range = range;
740  }
741 
742  virtual bool begin(TfLiteAudioStreamBase *parent) override {
743  // setup on first call
744  p_interpreter = &parent->interpreter();
745  input = p_interpreter->input(0);
746  output = p_interpreter->output(0);
747  channels = parent->config().channels;
748  return true;
749  }
750 
751  virtual int read(int16_t*data, int sampleCount) override {
752  TRACED();
753  float two_pi = 2 * PI;
754  for (int j=0; j<sampleCount; j+=channels){
755  // Quantize the input from floating-point to integer
756  input->data.int8[0] = TfLiteQuantizer::quantize(actX,input->params.scale, input->params.zero_point);
757 
758  // Invoke TF Model
759  TfLiteStatus invoke_status = p_interpreter->Invoke();
760 
761  // Check the result
762  if(kTfLiteOk!= invoke_status){
763  LOGE("invoke_status not ok");
764  return j;
765  }
766  if(kTfLiteInt8 != output->type){
767  LOGE("Output type is not kTfLiteInt8");
768  return j;
769  }
770 
771  // Dequantize the output and convet it to int32 range
772  data[j] = TfLiteQuantizer::dequantizeToNewRange(output->data.int8[0], output->params.scale, output->params.zero_point, range);
773  // printf("%d\n", data[j]); // for debugging using the Serial Plotter
774  LOGD("%f->%d / %d->%d",actX, input->data.int8[0], output->data.int8[0], data[j]);
775  for (int i=1;i<channels;i++){
776  data[j+i] = data[j];
777  LOGD("generate data for channels");
778  }
779  // Increment X
780  actX += increment;
781  if (actX>two_pi){
782  actX-=two_pi;
783  }
784  }
785  return sampleCount;
786  }
787 
788  protected:
789  float actX=0;
790  float increment=0.1;
791  int16_t range=0;
792  int channels;
793  TfLiteTensor* input = nullptr;
794  TfLiteTensor* output = nullptr;
795  tflite::MicroInterpreter* p_interpreter = nullptr;
796 };
797 
805  public:
806  TfLiteAudioStream() {}
807  ~TfLiteAudioStream() {
808  if (p_tensor_arena != nullptr) delete[] p_tensor_arena;
809  }
810 
811 
813  void setInterpreter(tflite::MicroInterpreter* p_interpreter) {
814  TRACED();
815  this->p_interpreter = p_interpreter;
816  }
817 
818  // Provides the default configuration
819  virtual TfLiteConfig defaultConfig() override {
820  TfLiteConfig def;
821  return def;
822  }
823 
825  virtual bool begin(TfLiteConfig config) override {
826  TRACED();
827  cfg = config;
828 
829  // alloatme memory
830  p_tensor_arena = new uint8_t[cfg.kTensorArenaSize];
831 
832  if (cfg.categoryCount()>0){
833 
834  // setup the feature provider
835  if (!setupWriter()) {
836  LOGE("setupWriter");
837  return false;
838  }
839  } else {
840  LOGW("categoryCount=%d", cfg.categoryCount());
841  }
842 
843  // Map the model into a usable data structure. This doesn't involve any
844  // copying or parsing, it's a very lightweight operation.
845  if (!setModel(cfg.model)) {
846  return false;
847  }
848 
849  if (!setupInterpreter()) {
850  return false;
851  }
852 
853  // Allocate memory from the p_tensor_arena for the model's tensors.
854  LOGI("AllocateTensors");
855  TfLiteStatus allocate_status = p_interpreter->AllocateTensors();
856  if (allocate_status != kTfLiteOk) {
857  LOGE("AllocateTensors() failed");
858  return false;
859  }
860 
861  // Get information about the memory area to use for the model's input.
862  LOGI("Get Input");
863  p_tensor = p_interpreter->input(0);
864  if (cfg.categoryCount()>0){
865  if ((p_tensor->dims->size != 2) || (p_tensor->dims->data[0] != 1) ||
866  (p_tensor->dims->data[1] !=
867  (cfg.kFeatureSliceCount * cfg.kFeatureSliceSize)) ||
868  (p_tensor->type != kTfLiteInt8)) {
869  LOGE("Bad input tensor parameters in model");
870  return false;
871  }
872  }
873 
874  LOGI("Get Buffer");
875  p_tensor_buffer = p_tensor->data.int8;
876  if (p_tensor_buffer == nullptr) {
877  LOGE("p_tensor_buffer is null");
878  return false;
879  }
880 
881  // setup reader
882  if (cfg.reader!=nullptr){
883  cfg.reader->begin(this);
884  }
885 
886  // all good if we made it here
887  is_setup = true;
888  LOGI("done");
889  return true;
890  }
891 
893  virtual int availableToWrite() override { return DEFAULT_BUFFER_SIZE; }
894 
896  virtual size_t write(const uint8_t* audio, size_t bytes) override {
897  TRACED();
898  if (cfg.writer==nullptr){
899  LOGE("cfg.output is null");
900  return 0;
901  }
902  int16_t* samples = (int16_t*)audio;
903  int16_t sample_count = bytes / 2;
904  for (int j = 0; j < sample_count; j++) {
905  cfg.writer->write(samples[j]);
906  }
907  return bytes;
908  }
909 
911  virtual int available() override { return cfg.reader != nullptr ? DEFAULT_BUFFER_SIZE : 0; }
912 
914  virtual size_t readBytes(uint8_t *data, size_t len) override {
915  TRACED();
916  if (cfg.reader!=nullptr){
917  return cfg.reader->read((int16_t*)data, (int) len/sizeof(int16_t)) * sizeof(int16_t);
918  }else {
919  return 0;
920  }
921  }
922 
924  tflite::MicroInterpreter& interpreter() override {
925  return *p_interpreter;
926  }
927 
929  TfLiteConfig &config() override {
930  return cfg;
931  }
932 
934  int8_t* modelInputBuffer() override {
935  return p_tensor_buffer;
936  }
937 
938  protected:
939  const tflite::Model* p_model = nullptr;
940  tflite::MicroInterpreter* p_interpreter = nullptr;
941  TfLiteTensor* p_tensor = nullptr;
942  bool is_setup = false;
943  TfLiteConfig cfg;
944  // Create an area of memory to use for input, output, and intermediate
945  // arrays. The size of this will depend on the model you're using, and may
946  // need to be determined by experimentation.
947  uint8_t* p_tensor_arena = nullptr;
948  int8_t* p_tensor_buffer = nullptr;
949 
950  virtual bool setModel(const unsigned char* model) {
951  TRACED();
952  p_model = tflite::GetModel(model);
953  if (p_model->version() != TFLITE_SCHEMA_VERSION) {
954  LOGE(
955  "Model provided is schema version %d not equal "
956  "to supported version %d.",
957  p_model->version(), TFLITE_SCHEMA_VERSION);
958  return false;
959  }
960  return true;
961  }
962 
963  virtual bool setupWriter() {
964  if (cfg.writer == nullptr) {
965  static TfLiteMicroSpeachWriter writer;
966  cfg.writer = &writer;
967  }
968  return cfg.writer->begin(this);
969  }
970 
971  // Pull in only the operation implementations we need.
972  // This relies on a complete list of all the ops needed by this graph.
973  // An easier approach is to just use the AllOpsResolver, but this will
974  // incur some penalty in code space for op implementations that are not
975  // needed by this graph.
976  //
977  virtual bool setupInterpreter() {
978  if (p_interpreter == nullptr) {
979  TRACEI();
980  if (cfg.useAllOpsResolver) {
981  tflite::AllOpsResolver resolver;
982  static tflite::MicroInterpreter static_interpreter(
983  p_model, resolver, p_tensor_arena, cfg.kTensorArenaSize,
984  error_reporter);
985  p_interpreter = &static_interpreter;
986  } else {
987  // NOLINTNEXTLINE(runtime-global-variables)
988  static tflite::MicroMutableOpResolver<4> micro_op_resolver(
989  error_reporter);
990  if (micro_op_resolver.AddDepthwiseConv2D() != kTfLiteOk) {
991  return false;
992  }
993  if (micro_op_resolver.AddFullyConnected() != kTfLiteOk) {
994  return false;
995  }
996  if (micro_op_resolver.AddSoftmax() != kTfLiteOk) {
997  return false;
998  }
999  if (micro_op_resolver.AddReshape() != kTfLiteOk) {
1000  return false;
1001  }
1002  // Build an p_interpreter to run the model with.
1003  static tflite::MicroInterpreter static_interpreter(
1004  p_model, micro_op_resolver, p_tensor_arena, cfg.kTensorArenaSize,
1005  error_reporter);
1006  p_interpreter = &static_interpreter;
1007  }
1008  }
1009  return true;
1010  }
1011 };
1012 
1013 } // namespace audio_tools
Base class for all Audio Streams. It support the boolean operator to test if the object is ready with...
Definition: AudioStreams.h:24
virtual int readArray(T data[], int len)
reads multiple values
Definition: Buffers.h:41
virtual int writeArray(const T data[], int len)
Fills the buffer data.
Definition: Buffers.h:65
virtual int availableForWrite()
provides the number of entries that are available to write
Definition: Buffers.h:365
virtual bool write(T data)
write add an entry to the buffer
Definition: Buffers.h:343
Base class for implementing different primitive decoding models on top of the instantaneous results f...
Definition: TfLiteAudioStream.h:212
Error Reporter using the Audio Tools Logger.
Definition: TfLiteAudioStream.h:64
Astract TfLiteAudioStream to provide access to TfLiteAudioStream for Reader and Writers.
Definition: TfLiteAudioStream.h:403
virtual int8_t * modelInputBuffer()=0
Provides access to the model input buffer.
virtual TfLiteConfig & config()=0
Provides the TfLiteConfig information.
virtual size_t write(const uint8_t *audio, size_t bytes)=0
process the data in batches of max kMaxAudioSampleSize.
TfLiteAudioStream which uses Tensorflow Light to analyze the data. If it is used as a generator (wher...
Definition: TfLiteAudioStream.h:804
virtual size_t readBytes(uint8_t *data, size_t len) override
provide audio data with cfg.input
Definition: TfLiteAudioStream.h:914
virtual bool begin(TfLiteConfig config) override
Start the processing.
Definition: TfLiteAudioStream.h:825
virtual size_t write(const uint8_t *audio, size_t bytes) override
process the data in batches of max kMaxAudioSampleSize.
Definition: TfLiteAudioStream.h:896
tflite::MicroInterpreter & interpreter() override
Provides the tf lite interpreter.
Definition: TfLiteAudioStream.h:924
void setInterpreter(tflite::MicroInterpreter *p_interpreter)
Optionally define your own p_interpreter.
Definition: TfLiteAudioStream.h:813
int8_t * modelInputBuffer() override
Provides access to the model input buffer.
Definition: TfLiteAudioStream.h:934
TfLiteConfig & config() override
Provides the TfLiteConfig information.
Definition: TfLiteAudioStream.h:929
virtual int availableToWrite() override
Constant streaming.
Definition: TfLiteAudioStream.h:893
virtual int available() override
We can provide only some audio data when cfg.input is defined.
Definition: TfLiteAudioStream.h:911
TfLiteMicroSpeachWriter for Audio Data.
Definition: TfLiteAudioStream.h:427
void printFeatures()
For debugging: print feature matrix.
Definition: TfLiteAudioStream.h:616
virtual bool write1(const int16_t sample)
Processes a single sample.
Definition: TfLiteAudioStream.h:522
virtual bool begin(TfLiteAudioStreamBase *parent)
Call begin before starting the processing.
Definition: TfLiteAudioStream.h:437
virtual void respondToCommand(const char *found_command, uint8_t score, bool is_new_command)
Overwrite this method to implement your own handler or provide callback.
Definition: TfLiteAudioStream.h:713
This class is designed to apply a very primitive decoding model on top of the instantaneous results f...
Definition: TfLiteAudioStream.h:233
TfLiteStatus validate(const TfLiteTensor *latest_results)
Checks the input data.
Definition: TfLiteAudioStream.h:365
TfLiteStatus evaluate(const char **found_command, uint8_t *result_score, bool *is_new_command)
Finds the result.
Definition: TfLiteAudioStream.h:319
void deleteOldRecords(int32_t limit)
Removes obsolete records from the queue.
Definition: TfLiteAudioStream.h:312
int categoryCount()
Determines the number of categories.
Definition: TfLiteAudioStream.h:307
bool begin(TfLiteConfig cfg) override
Setup parameters from config.
Definition: TfLiteAudioStream.h:240
int resultCategoryIdx(int8_t *score)
finds the category with the biggest score
Definition: TfLiteAudioStream.h:295
Quantizer that helps to quantize and dequantize between float and int8.
Definition: TfLiteAudioStream.h:179
Input class which provides the next value if the TfLiteAudioStream is treated as an audio sourcce.
Definition: TfLiteAudioStream.h:41
Generate a sine output from a model that was trained on the sine method. (=hello_world)
Definition: TfLiteAudioStream.h:736
Output class which interprets audio data if TfLiteAudioStream is treated as audio sink.
Definition: TfLiteAudioStream.h:53
Vector implementation which provides the most important methods as defined by std::vector....
Definition: Vector.h:21
Generic Implementation of sound input and output for desktop environments using portaudio.
Definition: AnalogAudio.h:10
Configuration settings for TfLiteAudioStream.
Definition: TfLiteAudioStream.h:85
void setCategories(const char *(&array)[N])
Defines the labels.
Definition: TfLiteAudioStream.h:147