@@ -56,6 +56,7 @@ struct TfLiteConfig {
56
56
TfLiteAudioFeatureProvider* featureProvider = nullptr ;
57
57
const char ** labels = nullptr ;
58
58
bool useAllOpsResolver = false ;
59
+ // callback for command handler
59
60
void (*respondToCommand)(const char * found_command, uint8_t score,
60
61
bool is_new_command) = nullptr ;
61
62
@@ -86,21 +87,40 @@ struct TfLiteConfig {
86
87
int kFeatureSliceDurationMs = 30 ;
87
88
88
89
// number of new slices to collect before evaluating the model
89
- int kSlicesToProcess = 3 ;
90
+ int kSlicesToProcess = 2 ;
91
+
92
+ // Parameters for RecognizeCommands
93
+ int32_t average_window_duration_ms = 1000 ;
94
+ uint8_t detection_threshold = 200 ;
95
+ int32_t suppression_ms = 1500 ;
96
+ int32_t minimum_count = 3 ;
97
+
98
+ // input for FrontendConfig
99
+ float filterbank_lower_band_limit = 125.0 ;
100
+ float filterbank_upper_band_limit = 7500.0 ;
101
+ float noise_reduction_smoothing_bits = 10 ;
102
+ float noise_reduction_even_smoothing = 0.025 ;
103
+ float noise_reduction_odd_smoothing = 0.06 ;
104
+ float noise_reduction_min_signal_remaining = 0.05 ;
105
+ bool pcan_gain_control_enable_pcan = 1 ;
106
+ float pcan_gain_control_strength = 0.95 ;
107
+ float pcan_gain_control_offset = 80.0 ;
108
+ float pcan_gain_control_gain_bits = 21 ;
109
+ bool log_scale_enable_log = 1 ;
110
+ uint8_t log_scale_scale_shift = 6 ;
111
+
112
+ int featureElementCount () {
113
+ return kFeatureSliceSize * kFeatureSliceCount ;
114
+ }
90
115
91
- int featureElementCount () { return kFeatureSliceSize * kFeatureSliceCount ; }
92
116
int audioSampleSize () {
93
117
return kFeatureSliceDurationMs * (sample_rate / 1000 );
94
118
}
119
+
95
120
int strideSampleSize () {
96
121
return kFeatureSliceStrideMs * (sample_rate / 1000 );
97
122
}
98
123
99
- // Parameters for RecognizeCommands
100
- int32_t average_window_duration_ms = 1000 ;
101
- uint8_t detection_threshold = 200 ;
102
- int32_t suppression_ms = 1500 ;
103
- int32_t minimum_count = 3 ;
104
124
};
105
125
106
126
// Partial implementation of std::dequeue, just providing the functionality
@@ -512,20 +532,19 @@ class TfLiteAudioFeatureProvider {
512
532
LOGD (LOG_METHOD);
513
533
config.window .size_ms = cfg.kFeatureSliceDurationMs ;
514
534
config.window .step_size_ms = cfg.kFeatureSliceStrideMs ;
515
- config.noise_reduction .smoothing_bits = 10 ;
516
535
config.filterbank .num_channels = cfg.kFeatureSliceSize ;
517
- config.filterbank .lower_band_limit = 125.0 ;
518
- config.filterbank .upper_band_limit = 7500.0 ;
519
- config.noise_reduction .smoothing_bits = 10 ;
520
- config.noise_reduction .even_smoothing = 0.025 ;
521
- config.noise_reduction .odd_smoothing = 0.06 ;
522
- config.noise_reduction .min_signal_remaining = 0.05 ;
523
- config.pcan_gain_control .enable_pcan = 1 ;
524
- config.pcan_gain_control .strength = 0.95 ;
525
- config.pcan_gain_control .offset = 80.0 ;
526
- config.pcan_gain_control .gain_bits = 21 ;
527
- config.log_scale .enable_log = 1 ;
528
- config.log_scale .scale_shift = 6 ;
536
+ config.filterbank .lower_band_limit = cfg. filterbank_lower_band_limit ;
537
+ config.filterbank .upper_band_limit = cfg. filterbank_upper_band_limit ;
538
+ config.noise_reduction .smoothing_bits = cfg. noise_reduction_smoothing_bits ;
539
+ config.noise_reduction .even_smoothing = cfg. noise_reduction_even_smoothing ;
540
+ config.noise_reduction .odd_smoothing = cfg. noise_reduction_odd_smoothing ;
541
+ config.noise_reduction .min_signal_remaining = cfg. noise_reduction_min_signal_remaining ;
542
+ config.pcan_gain_control .enable_pcan = cfg. pcan_gain_control_enable_pcan ;
543
+ config.pcan_gain_control .strength = cfg. pcan_gain_control_strength ;
544
+ config.pcan_gain_control .offset = cfg. pcan_gain_control_offset ;
545
+ config.pcan_gain_control .gain_bits = cfg. pcan_gain_control_gain_bits ;
546
+ config.log_scale .enable_log = cfg. log_scale_enable_log ;
547
+ config.log_scale .scale_shift = cfg. log_scale_scale_shift ;
529
548
if (!FrontendPopulateState (&config, &g_micro_features_state,
530
549
cfg.sample_rate )) {
531
550
LOGE (" frontendPopulateState() failed" );
@@ -715,6 +734,7 @@ class TfLiteAudioOutput : public AudioPrint {
715
734
current_time += cfg.kFeatureSliceStrideMs ;
716
735
// determine slice
717
736
total_slice_count++;
737
+
718
738
int8_t * feature_buffer = feature_provider->addSlice ();
719
739
if (total_slice_count >= cfg.kSlicesToProcess ) {
720
740
processSlices (feature_buffer);
0 commit comments