Skip to content

Commit 50eea00

Browse files
committed
Delay
1 parent 33148a9 commit 50eea00

File tree

3 files changed

+91
-53
lines changed

3 files changed

+91
-53
lines changed

src/AudioEffects/AudioEffect.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ class Delay : public AudioEffect {
331331

332332
void updateBufferSize(){
333333
uint16_t newSampleCount = sampleRate * p_ms / 1000;
334-
if (newSampleCount>sampleCount){
334+
if (newSampleCount!=sampleCount){
335335
if (p_history!=nullptr) delete p_history;
336336
sampleCount = newSampleCount;
337337
p_history = new RingBuffer<effect_t>(sampleCount);

src/AudioLibs/TfLiteAudioOutput.h

Lines changed: 77 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#pragma once
22

3+
// Configure FFT to output 16 bit fixed point.
4+
#define FIXED_POINT 16
5+
36
#include <TensorFlowLite.h>
47

58
#include <cmath>
@@ -73,7 +76,7 @@ struct TfLiteConfig {
7376
// the frequency information. This has to be a power of two, and since
7477
// we're dealing with 30ms of 16KHz inputs, which means 480 samples, this
7578
// is the next value.
76-
int kMaxAudioSampleSize = 480;
79+
//int kMaxAudioSampleSize = 320; //512; // 480
7780
int kAudioSampleFrequency = 16000;
7881

7982
// Number of audio channels - is usually 1. If 2 we reduce it to 1 by averaging the 2 channels
@@ -90,6 +93,8 @@ struct TfLiteConfig {
9093
int kSlicesToProcess = 3;
9194

9295
int featureElementCount() { return kFeatureSliceSize * kFeatureSliceCount; }
96+
int audioSampleSize() { return kFeatureSliceDurationMs * (kAudioSampleFrequency / 1000); }
97+
int strideSampleSize() {return kFeatureSliceStrideMs * (kAudioSampleFrequency / 1000);}
9398

9499
// Parameters for RecognizeCommands
95100
int32_t average_window_duration_ms = 1000;
@@ -190,7 +195,7 @@ class TfLiteResultsQueue {
190195
template <int N>
191196
class TfLiteAbstractRecognizeCommands {
192197
public:
193-
virtual TfLiteStatus ProcessLatestResults(const TfLiteTensor* latest_results,
198+
virtual TfLiteStatus processLatestResults(const TfLiteTensor* latest_results,
194199
const int32_t current_time_ms,
195200
const char** found_command, uint8_t* score,
196201
bool* is_new_command) = 0;
@@ -225,32 +230,41 @@ class TfLiteRecognizeCommands : public TfLiteAbstractRecognizeCommands<N> {
225230
// further recognitions for a set time after one has been triggered, which can
226231
// help reduce spurious recognitions.
227232

228-
explicit TfLiteRecognizeCommands() {
233+
TfLiteRecognizeCommands() {
229234
previous_top_label_ = "silence";
230235
previous_top_label_time_ = std::numeric_limits<int32_t>::min();
231236
kCategoryCount = N;
232237
}
233238

234239
/// Setup parameters from config
235240
bool begin(TfLiteConfig cfg) override {
241+
if (kCategoryCount==0){
242+
LOGE("kCategoryCount must not be 0");
243+
return false;
244+
}
245+
if (cfg.labels==nullptr){
246+
LOGE("config.labels not defined");
247+
return false;
248+
}
236249
average_window_duration_ms_ = cfg.average_window_duration_ms;
237250
detection_threshold_ = cfg.detection_threshold;
238251
suppression_ms_ = cfg.suppression_ms;
239252
minimum_count_ = cfg.minimum_count;
240253
kCategoryLabels = cfg.labels;
241-
if (cfg.labels==0){
242-
LOGW("config.labels not defined");
243-
return false;
244-
}
254+
started = true;
245255
return true;
246256
}
247257

248258
// Call this with the results of running a model on sample data.
249-
virtual TfLiteStatus ProcessLatestResults(const TfLiteTensor* latest_results,
259+
virtual TfLiteStatus processLatestResults(const TfLiteTensor* latest_results,
250260
const int32_t current_time_ms,
251261
const char** found_command, uint8_t* score,
252262
bool* is_new_command) override {
253263
LOGD(LOG_METHOD);
264+
if (!started){
265+
LOGE("TfLiteRecognizeCommands not started");
266+
return kTfLiteError;
267+
}
254268
if ((latest_results->dims->size != 2) ||
255269
(latest_results->dims->data[0] != 1) ||
256270
(latest_results->dims->data[1] != kCategoryCount)) {
@@ -359,6 +373,7 @@ class TfLiteRecognizeCommands : public TfLiteAbstractRecognizeCommands<N> {
359373
int32_t minimum_count_;
360374
int kCategoryCount;
361375
const char** kCategoryLabels = nullptr;
376+
bool started = false;
362377

363378
// Working variables
364379
TfLiteResultsQueue<N> previous_results_;
@@ -385,13 +400,20 @@ class TfLiteAudioFeatureProvider {
385400
virtual bool begin(TfLiteConfig config) {
386401
LOGD(LOG_METHOD);
387402
cfg = config;
403+
kMaxAudioSampleSize = cfg.audioSampleSize();
404+
kStrideSampleSize = cfg.strideSampleSize();
405+
kKeepSampleSize = kMaxAudioSampleSize - kStrideSampleSize;
406+
407+
// Allocate ring buffer
388408
if (p_buffer == nullptr) {
389-
p_buffer = new audio_tools::RingBuffer<int16_t>(cfg.kMaxAudioSampleSize);
390-
LOGD("Allocating buffer for %d samples", cfg.kMaxAudioSampleSize);
409+
p_buffer = new audio_tools::RingBuffer<int16_t>(kMaxAudioSampleSize);
410+
LOGD("Allocating buffer for %d samples", kMaxAudioSampleSize);
391411
}
412+
392413
// Initialize the feature data to default values.
393414
if (feature_data_ == nullptr) {
394-
feature_data_ = new int8_t[cfg.featureElementCount()]{}; // initialzed array
415+
feature_data_ = new int8_t[cfg.featureElementCount()];
416+
memset(feature_data_,0, cfg.featureElementCount());
395417
}
396418

397419
TfLiteStatus init_status = initializeMicroFeatures();
@@ -428,15 +450,13 @@ class TfLiteAudioFeatureProvider {
428450

429451
protected:
430452
TfLiteConfig cfg;
431-
// int feature_size_;
432453
int8_t* feature_data_ = nullptr;
433-
// Make sure we don't try to use cached information if this is the first
434-
// call into the provider.
435-
bool is_first_run_ = true;
436-
bool g_is_first_time = true;
437-
// const char** kCategoryLabels;
438454
audio_tools::RingBuffer<int16_t>* p_buffer = nullptr;
439455
FrontendState g_micro_features_state;
456+
FrontendConfig config;
457+
int kMaxAudioSampleSize;
458+
int kStrideSampleSize;
459+
int kKeepSampleSize;
440460

441461
// If we can avoid recalculating some slices, just move the existing
442462
// data up in the spectrogram, to perform something like this: last time
@@ -452,26 +472,32 @@ class TfLiteAudioFeatureProvider {
452472
// +-----------+ +-----------+
453473
virtual void addSlice() {
454474
LOGD(LOG_METHOD);
475+
// shift feature_data_ by one slice one one
455476
memmove(feature_data_, feature_data_ + cfg.kFeatureSliceSize,
456477
(cfg.kFeatureSliceCount - 1) * cfg.kFeatureSliceSize);
457478

458479
// copy data from buffer to audio_samples
459-
int16_t audio_samples[cfg.kMaxAudioSampleSize];
460-
int audio_samples_size =
461-
p_buffer->readArray(audio_samples, cfg.kMaxAudioSampleSize);
480+
int16_t audio_samples[kMaxAudioSampleSize];
481+
int audio_samples_size = p_buffer->readArray(audio_samples, kMaxAudioSampleSize);
482+
483+
// check size
484+
if (audio_samples_size!=kMaxAudioSampleSize){
485+
LOGE("audio_samples_size=%d != kMaxAudioSampleSize=%d",audio_samples_size, kMaxAudioSampleSize);
486+
}
462487

488+
// keep some data to be reprocessed - move by kStrideSampleSize
489+
p_buffer->writeArray(audio_samples+kStrideSampleSize, kKeepSampleSize);
463490

464491
// the new slice data will always be stored at the end
465-
int8_t* new_slice_data =
466-
feature_data_ + ((cfg.kFeatureSliceCount - 1) * cfg.kFeatureSliceSize);
467-
size_t num_samples_read = audio_samples_size;
492+
int8_t* new_slice_data = feature_data_ + ((cfg.kFeatureSliceCount - 1) * cfg.kFeatureSliceSize);
493+
size_t num_samples_read = 0;
468494
if (generateMicroFeatures(audio_samples, audio_samples_size,
469-
cfg.kFeatureSliceSize, new_slice_data,
495+
new_slice_data, cfg.kFeatureSliceSize,
470496
&num_samples_read) != kTfLiteOk) {
471497
LOGE("Error generateMicroFeatures");
472498
}
473499

474-
// printFeatures();
500+
//printFeatures();
475501
}
476502

477503
/// For debugging: print feature matrix
@@ -483,11 +509,11 @@ class TfLiteAudioFeatureProvider {
483509
}
484510
Serial.println();
485511
}
512+
Serial.println("------------");
486513
}
487514

488515
virtual TfLiteStatus initializeMicroFeatures() {
489516
LOGD(LOG_METHOD);
490-
FrontendConfig config;
491517
config.window.size_ms = cfg.kFeatureSliceDurationMs;
492518
config.window.step_size_ms = cfg.kFeatureSliceStrideMs;
493519
config.noise_reduction.smoothing_bits = 10;
@@ -506,38 +532,42 @@ class TfLiteAudioFeatureProvider {
506532
config.log_scale.scale_shift = 6;
507533
if (!FrontendPopulateState(&config, &g_micro_features_state,
508534
cfg.kAudioSampleFrequency)) {
509-
LOGE("FrontendPopulateState() failed");
535+
LOGE("frontendPopulateState() failed");
510536
return kTfLiteError;
511537
}
512-
g_is_first_time = true;
513538
return kTfLiteOk;
514539
}
515540

516-
// This is not exposed in any header, and is only used for testing, to ensure
517-
// that the state is correctly set up before generating results.
518-
void setMicroFeaturesNoiseEstimates(const uint32_t* estimate_presets) {
519-
LOGD(LOG_METHOD);
520-
for (int i = 0; i < g_micro_features_state.filterbank.num_channels; ++i) {
521-
g_micro_features_state.noise_reduction.estimate[i] = estimate_presets[i];
522-
}
523-
}
541+
// // This is not exposed in any header, and is only used for testing, to ensure
542+
// // that the state is correctly set up before generating results.
543+
// void setMicroFeaturesNoiseEstimates(const uint32_t* estimate_presets) {
544+
// LOGD(LOG_METHOD);
545+
// for (int i = 0; i < g_micro_features_state.filterbank.num_channels; ++i) {
546+
// g_micro_features_state.noise_reduction.estimate[i] = estimate_presets[i];
547+
// }
548+
// }
524549

525550
virtual TfLiteStatus generateMicroFeatures(const int16_t* input, int input_size,
526-
int output_size, int8_t* output,
551+
int8_t* output, int output_size,
527552
size_t* num_samples_read) {
528553
LOGD(LOG_METHOD);
529-
const int16_t* frontend_input;
530-
if (g_is_first_time) {
531-
frontend_input = input;
532-
g_is_first_time = false;
533-
} else {
534-
frontend_input = input;
535-
}
554+
const int16_t* frontend_input=input;
536555

537556
// Apply FFT
538557
FrontendOutput frontend_output = FrontendProcessSamples(
539558
&g_micro_features_state, frontend_input, input_size, num_samples_read);
540559

560+
// Check size
561+
if (output_size != frontend_output.size){
562+
LOGE("output_size=%d, frontend_output.size=%d",output_size, frontend_output.size);
563+
}
564+
565+
// // check generated features
566+
// if (input_size != *num_samples_read){
567+
// LOGE("audio_samples_size=%d vs num_samples_read=%d", input_size, *num_samples_read);
568+
// }
569+
570+
541571
for (size_t i = 0; i < frontend_output.size; ++i) {
542572
// These scaling values are derived from those used in input_data.py in
543573
// the training pipeline. The feature pipeline outputs 16-bit signed
@@ -675,7 +705,7 @@ class TfLiteAudioOutput : public AudioPrint {
675705
// we submit int16 data which will be reduced to 8bits so we can send
676706
// double the amount - 2 channels will be recuced to 1 so we multiply by
677707
// number of channels
678-
int maxBytes = cfg.kMaxAudioSampleSize * 2 * cfg.kAudioChannels;
708+
int maxBytes = cfg.audioSampleSize() * 2 * cfg.kAudioChannels;
679709
while (open > 0) {
680710
int len = min(open, maxBytes);
681711
result += processAudio(audio + pos, len);
@@ -810,10 +840,10 @@ class TfLiteAudioOutput : public AudioPrint {
810840
uint8_t score = 0;
811841
bool is_new_command = false;
812842

813-
TfLiteStatus process_status = recognizer->ProcessLatestResults(
843+
TfLiteStatus process_status = recognizer->processLatestResults(
814844
output, current_time, &found_command, &score, &is_new_command);
815845
if (process_status != kTfLiteOk) {
816-
LOGE("TfLiteRecognizeCommands::ProcessLatestResults() failed");
846+
LOGE("TfLiteRecognizeCommands::processLatestResults() failed");
817847
return 0;
818848
}
819849
// Do something based on the recognized command. The default

src/AudioTools/Buffers.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -234,8 +234,7 @@ template<typename T>
234234
class RingBuffer : public BaseBuffer<T> {
235235
public:
236236
RingBuffer(int size){
237-
this->max_size = size;
238-
_aucBuffer = new T[max_size];
237+
resize(size);
239238
reset();
240239
}
241240

@@ -247,7 +246,7 @@ class RingBuffer : public BaseBuffer<T> {
247246
if (isEmpty())
248247
return -1;
249248

250-
uint8_t value = _aucBuffer[_iTail];
249+
T value = _aucBuffer[_iTail];
251250
_iTail = nextIndex(_iTail);
252251
_numElems--;
253252

@@ -304,14 +303,23 @@ class RingBuffer : public BaseBuffer<T> {
304303
virtual T* address() {
305304
return _aucBuffer;
306305
}
306+
307+
virtual void resize(int len){
308+
if (_aucBuffer!=nullptr){
309+
delete []_aucBuffer;
310+
}
311+
this->max_size = len;
312+
_aucBuffer = new T[max_size];
313+
reset();
314+
}
307315

308316

309317
protected:
310-
T *_aucBuffer ;
318+
T *_aucBuffer=nullptr;
311319
int _iHead ;
312320
int _iTail ;
313321
int _numElems;
314-
int max_size;
322+
int max_size=0;
315323

316324
int nextIndex(int index){
317325
return (uint32_t)(index + 1) % max_size;

0 commit comments

Comments
 (0)