From 82f3cfeeaf6c0c174401e4b9a3d8b71e7b131f39 Mon Sep 17 00:00:00 2001 From: Vlad Turcuman Date: Thu, 15 Apr 2021 16:39:49 +0300 Subject: [PATCH 1/5] Add MicroBitSoundRecogniser and changed MicroBitAudioProcessor Changed the MicroBitAudioProcessor to also be a DataSource. Added a Hann Window and Harmonic Product Spectrum before and after fft to get more accurate results for square wave detection. Added MicroBitSoundRecogniser as a DataSink, which recognises given patterns in the frequency data. It connects to the audio processor to get the frequency analysis. The constructor is protected such that the class becomes abstract. EmojiRecogniser inherits it and implements the constructor - adds the happy sound as the sound to be recognised. To recognise the happy sound the following main.cpp can be used: #include "MicroBitSerial.h" #include "MicroBit.h" #include "CodalDmesg.h" #include "MicroBitAudioProcessor.h" #include "EmojiRecogniser.h" #include "StreamNormalizer.h" #include "Tests.h" #include "LevelDetector.h" #include "StreamSplitter.h" static NRF52ADCChannel *mic = NULL; static StreamNormalizer *processor = NULL; static MicroBitAudioProcessor *fft = NULL; static LevelDetector *level = NULL; static StreamSplitter *splitter = NULL; MicroBitSoundRecogniser *recogniser = NULL; MicroBit uBit; void onSound(ManagedString sound) { recogniser->stopAnalisying(); uBit.display.scroll(sound); recogniser->startAnalisying(onSound); } int main() { uBit.init(); NRF52ADCChannel *mic = uBit.adc.getChannel(uBit.io.microphone); mic->setGain(7,0); uBit.io.runmic.setDigitalValue(1); uBit.io.runmic.setHighDrive(true); StreamNormalizer *processor = new StreamNormalizer(mic->output, 1.2f, true, DATASTREAM_FORMAT_8BIT_SIGNED, 10); StreamSplitter *splitter = new StreamSplitter(processor->output); fft = new MicroBitAudioProcessor(*splitter); recogniser = new EmojiRecogniser(*fft, uBit); recogniser->startAnalisying(onSound); while(1){ uBit.display.print("-"); uBit.sleep(1000); } } --- inc/EmojiRecogniser.h | 16 +++ inc/MicroBitAudioProcessor.h | 47 ++++++- inc/MicroBitSoundRecogniser.h | 91 +++++++++++++ model/MicroBit.cpp | 2 +- model/MicroBit.h | 2 +- source/EmojiRecogniser.cpp | 72 ++++++++++ source/MicroBitAudioProcessor.cpp | 146 +++++++++++++++----- source/MicroBitSoundRecogniser.cpp | 212 +++++++++++++++++++++++++++++ 8 files changed, 546 insertions(+), 42 deletions(-) create mode 100644 inc/EmojiRecogniser.h create mode 100644 inc/MicroBitSoundRecogniser.h create mode 100644 source/EmojiRecogniser.cpp create mode 100644 source/MicroBitSoundRecogniser.cpp diff --git a/inc/EmojiRecogniser.h b/inc/EmojiRecogniser.h new file mode 100644 index 00000000..36a227a9 --- /dev/null +++ b/inc/EmojiRecogniser.h @@ -0,0 +1,16 @@ + +#ifndef EMOJI_RECOGNISER_H +#define EMOJI_RECOGNISER_H + +#include "MicroBitSoundRecogniser.h" + +class EmojiRecogniser : public MicroBitSoundRecogniser +{ + void addHappySound(); + + public: + EmojiRecogniser(MicroBitAudioProcessor& processor, + MicroBit& uBit); +}; + +#endif \ No newline at end of file diff --git a/inc/MicroBitAudioProcessor.h b/inc/MicroBitAudioProcessor.h index 761a5a5b..53bebbf8 100644 --- a/inc/MicroBitAudioProcessor.h +++ b/inc/MicroBitAudioProcessor.h @@ -26,12 +26,35 @@ DEALINGS IN THE SOFTWARE. #ifndef MICROBIT_AUDIO_PROCESSOR_H #define MICROBIT_AUDIO_PROCESSOR_H -#define MIC_SAMPLE_RATE (11 * 1024) -#define AUDIO_SAMPLES_NUMBER 1024 +#define MIC_SAMPLE_RATE (1000000 / MIC_SAMPLE_DELTA) +#define AUDIO_SAMPLES_NUMBER 512 +#define HISTORY_LENGTH 50 -class MicroBitAudioProcessor : public DataSink +#define RECOGNITION_START_FREQ 1400 +#define RECOGNITION_END_FREQ 4500 + +#define ANALYSIS_STD_MULT_THRESHOLD 3 +#define ANALYSIS_STD_THRESHOLD 75 +#define ANALYSIS_MEAN_THRESHOLD 0 + +// #define SQUARE_BEFORE_ANALYSIS +#define HARMONIC_PRODUCT_SPECTRUM +#define HANN_WINDOW + + +class MicroBitAudioProcessor : public DataSink, public DataSource { - DataSource &audiostream; +public: + + struct AudioFrameAnalysis { + uint8_t size; + uint16_t buf[3]; + }; + +private: + + DataSource &audiostream; + DataSink *recogniser; int zeroOffset; // unsigned value that is the best effort guess of the zero point of the data source int divisor; // Used for LINEAR modes arm_rfft_fast_instance_f32 fft_instance; @@ -40,13 +63,29 @@ class MicroBitAudioProcessor : public DataSink float *mag; uint16_t position; bool recording; + +#ifdef HANN_WINDOW + float32_t hann_window[AUDIO_SAMPLES_NUMBER]; +#endif + + AudioFrameAnalysis out_buffer[HISTORY_LENGTH * 2]; + uint16_t out_buffer_len; + bool consumed_buffer; + + // What is this used for? Couldn't find any references to it float rec[AUDIO_SAMPLES_NUMBER * 2]; int lastFreq; + uint16_t frequencyToIndex(int freq); + float32_t indexToFrequency(int index); + void sendAnalysis(uint16_t* freq, uint8_t size); + public: MicroBitAudioProcessor(DataSource& source); ~MicroBitAudioProcessor(); virtual int pullRequest(); + void connect(DataSink *downstream); + virtual ManagedBuffer pull(); int getFrequency(); int setDivisor(int d); void startRecording(); diff --git a/inc/MicroBitSoundRecogniser.h b/inc/MicroBitSoundRecogniser.h new file mode 100644 index 00000000..3be3cc6b --- /dev/null +++ b/inc/MicroBitSoundRecogniser.h @@ -0,0 +1,91 @@ + +#ifndef MICROBIT_SOUND_RECOGNISER_H +#define MICROBIT_SOUND_RECOGNISER_H + +#include "MicroBit.h" +#include "DataStream.h" +#include "MicroBitAudioProcessor.h" +#include "arm_math.h" + +// Should be the minimum number of elements needed in the buffer +// at any time +#define HISTORY_LENGTH 50 + +class MicroBitSoundRecogniser : public DataSink +{ + private: + MicroBitAudioProcessor& audio_proceesor; + MicroBit& uBit; + + bool analysing; + + void (*callback)(ManagedString) = NULL; + + MicroBitAudioProcessor::AudioFrameAnalysis buffer[2 * HISTORY_LENGTH]; + uint8_t buffer_len; + + protected: + struct SoundSample { + SoundSample(const uint16_t* _frames, uint8_t size); + ~SoundSample(); + + uint8_t size; + uint16_t* frames; + }; + + struct SoundSequence { + SoundSequence(uint8_t size, uint32_t threshold, uint8_t deviation); + ~SoundSequence(); + + uint8_t size; + uint32_t threshold; + uint8_t deviation; + SoundSample** samples; + }; + + struct Sound { + Sound(uint8_t size, uint8_t max_zeros, uint8_t max_history_len); + ~Sound(); + + uint8_t max_zeros; + uint8_t size; + SoundSequence** sequences; + + void update( MicroBitAudioProcessor::AudioFrameAnalysis* buffer, + uint8_t buffer_len ); + bool matched(); + void resetHistory(); + + private: + + uint8_t matchSequence( uint8_t seq_id, + MicroBitAudioProcessor::AudioFrameAnalysis* buffer, + uint8_t buffer_len) const; + + uint8_t getZeros(uint8_t frames_ago, uint8_t seq_id) const; + void addToHistory(uint8_t seq_id, uint8_t value); + void endHistoryFrame(); + + uint8_t* history; + uint8_t history_len; + uint8_t max_history_len; + }; + + MicroBitSoundRecogniser(MicroBitAudioProcessor& processor, MicroBit& uBit); + + Sound** sounds; + ManagedString** sounds_names; + uint8_t sounds_size; + + public: + ~MicroBitSoundRecogniser(); + + virtual int pullRequest(); + + MicroBitAudioProcessor* getAudioProcessor(); + void setCallback(void (*_callback)(ManagedString)); + void startAnalisying(void (*_callback)(ManagedString)); + void stopAnalisying(); +}; + +#endif diff --git a/model/MicroBit.cpp b/model/MicroBit.cpp index f8ceb8a7..f1794e20 100644 --- a/model/MicroBit.cpp +++ b/model/MicroBit.cpp @@ -60,7 +60,7 @@ MicroBit::MicroBit() : capTouchTimer(NRF_TIMER3, TIMER3_IRQn), timer(systemTimer), messageBus(), - adc(adcTimer, 91), + adc(adcTimer, MIC_SAMPLE_DELTA), touchSensor(capTouchTimer), io(adc, touchSensor), serial(io.usbTx, io.usbRx, NRF_UARTE0), diff --git a/model/MicroBit.h b/model/MicroBit.h index d47f5ed2..71ec1895 100644 --- a/model/MicroBit.h +++ b/model/MicroBit.h @@ -87,7 +87,7 @@ DEALINGS IN THE SOFTWARE. //#include "MicroBitLightSensor.h" - +#define MIC_SAMPLE_DELTA 91 // Status flag values #define DEVICE_INITIALIZED 0x01 diff --git a/source/EmojiRecogniser.cpp b/source/EmojiRecogniser.cpp new file mode 100644 index 00000000..5cb4af1a --- /dev/null +++ b/source/EmojiRecogniser.cpp @@ -0,0 +1,72 @@ + +#include "EmojiRecogniser.h" + + +EmojiRecogniser::EmojiRecogniser( MicroBitAudioProcessor& processor, + MicroBit& uBit) + : MicroBitSoundRecogniser(processor, uBit){ + sounds = new Sound* [7]; + sounds_names = new ManagedString* [7]; + addHappySound(); +} + +void EmojiRecogniser::addHappySound() { + + uint16_t happy[3][6][8] = { + // First sequence + { + { 4, 4368, 0, 0, 2751}, + { 5, 4431, 0, 0, 0, 2751}, + { 6, 4473, 0, 0, 0, 0, 2751}, + { 5, 4263, 0, 0, 0, 2751}, + { 6, 4263, 0, 0, 0, 0, 2751}, + { 4, 4263, 0, 0, 2751} + }, + // Second sequence + { + { 5, 3024, 3024, 3276, 3276, 3255}, + { 6, 3024, 3024, 3717, 3276, 3276, 3255}, + { 5, 2751, 3003, 3276, 3276, 3276} + }, + // Third sequence + { + { 6, 3423, 3276, 3276, 3024, 3024, 3024}, + { 6, 3423, 3276, 3276, 3255, 3024, 3024}, + { 6, 3423, 3276, 3276, 3024, 3024, 2961}, + { 6, 3276, 3234, 3234, 3276, 3003, 3003} + } + }; + + uint16_t happy_thresholds[3] = { + 50 * 50, + 50 * 100, + 50 * 100 + }; + + uint8_t happy_deviations[3] = { + 0, + 1, + 1 + }; + + uint8_t happy_samples[3] = { + 6, + 3, + 4 + }; + + uint8_t it = sounds_size; + sounds_size ++; + sounds_names[it] = new ManagedString("happy"); + + sounds[it] = new Sound(2, 3, 10); + + for(uint8_t i = 1; i < 3 ; i++){ + sounds[it] -> sequences[i -1] = new SoundSequence(happy_samples[i], happy_thresholds[i], happy_deviations[i]); + for(uint8_t j = 0; j < happy_samples[i]; j ++) + sounds[it] -> sequences[i-1] -> samples[j] = new SoundSample(happy[i][j] + 1, happy[i][j][0]); + } + +} + + diff --git a/source/MicroBitAudioProcessor.cpp b/source/MicroBitAudioProcessor.cpp index f2f05f4d..1b7302e9 100644 --- a/source/MicroBitAudioProcessor.cpp +++ b/source/MicroBitAudioProcessor.cpp @@ -21,17 +21,28 @@ DEALINGS IN THE SOFTWARE. #include "MicroBit.h" #include "MicroBitAudioProcessor.h" -MicroBitAudioProcessor::MicroBitAudioProcessor(DataSource& source) : audiostream(source) -{ + + +MicroBitAudioProcessor::MicroBitAudioProcessor(DataSource& source) + : audiostream(source), recogniser(NULL) { divisor = 1; lastFreq = 0; arm_rfft_fast_init_f32(&fft_instance, AUDIO_SAMPLES_NUMBER); /* Double Buffering: We allocate twice the number of samples*/ + // the C++ way for doing this is (as far as I know) + // new float32_t[AUDIO_SAMPLES_NUMBER * 2]; buf = (float *)malloc(sizeof(float) * AUDIO_SAMPLES_NUMBER * 2); output = (float *)malloc(sizeof(float) * AUDIO_SAMPLES_NUMBER); mag = (float *)malloc(sizeof(float) * AUDIO_SAMPLES_NUMBER / 2); + + memset(buf, 0, sizeof(buf)); + +#ifdef HANN_WINDOW + for(int i=0; i < AUDIO_SAMPLES_NUMBER; i++) + hann_window[i] = 0.5 * (1 - arm_cos_f32(2 * 3.14159265 * i / AUDIO_SAMPLES_NUMBER)); +#endif position = 0; recording = false; @@ -51,6 +62,48 @@ MicroBitAudioProcessor::~MicroBitAudioProcessor() free(mag); } +uint16_t MicroBitAudioProcessor::frequencyToIndex(int freq) { + return (freq / ((uint32_t)MIC_SAMPLE_RATE / AUDIO_SAMPLES_NUMBER)); +} + +float32_t MicroBitAudioProcessor::indexToFrequency(int index) { + return ((uint32_t)MIC_SAMPLE_RATE / AUDIO_SAMPLES_NUMBER) * index; +} + +void MicroBitAudioProcessor::sendAnalysis(uint16_t* freq, uint8_t size) { + + if(consumed_buffer) + out_buffer_len = 0; + + if(out_buffer_len == 2 * HISTORY_LENGTH){ + memcpy(out_buffer, &out_buffer[HISTORY_LENGTH], (sizeof(AudioFrameAnalysis) * HISTORY_LENGTH)); + out_buffer_len = HISTORY_LENGTH; + } + + out_buffer[out_buffer_len].size = min(size, 3); + if(size > 2) + out_buffer[out_buffer_len].buf[2] = freq[2]; + if(size > 1) + out_buffer[out_buffer_len].buf[1] = freq[1]; + if(size > 0) + out_buffer[out_buffer_len].buf[0] = freq[0]; + out_buffer_len ++; + + if(recogniser) + recogniser -> pullRequest(); +} + +void MicroBitAudioProcessor::connect(DataSink *downstream){ + recogniser = downstream; +} + +ManagedBuffer MicroBitAudioProcessor::pull() +{ + consumed_buffer = true; + return ManagedBuffer((uint8_t*) out_buffer, sizeof(AudioFrameAnalysis) * out_buffer_len); +} + + int MicroBitAudioProcessor::pullRequest() { @@ -62,16 +115,11 @@ int MicroBitAudioProcessor::pullRequest() if (!recording) return DEVICE_OK; - //using 8 bits produces more accurate to input results (not 2x like using 16) but issue with - //F and G both producing 363hz? - - // A 440 matches perfectly, but the rest of the notes dont? - //int8_t *data = (int8_t *) &mic_samples[0]; - - //Legacy Version - int16_t *data = (int16_t *) &mic_samples[0]; + // Should be int8_t as this is the type the microphone + // records it + int8_t *data = (int8_t *) &mic_samples[0]; - int samples = mic_samples.length() / 2; + int samples = mic_samples.length(); for (int i=0; i < samples; i++) { @@ -86,31 +134,57 @@ int MicroBitAudioProcessor::pullRequest() if (!(position % AUDIO_SAMPLES_NUMBER)) { - float maxValue = 0; - uint32_t index = 0; - - /* We have AUDIO_SAMPLES_NUMBER samples, we can run the FFT on them */ - uint16_t offset = position <= AUDIO_SAMPLES_NUMBER ? 0 : AUDIO_SAMPLES_NUMBER; - if (offset != 0) - position = 0; - - //DMESG("Run FFT, %d", offset); - //auto a = system_timer_current_time(); - arm_rfft_fast_f32(&fft_instance, buf + offset, output, 0); - arm_cmplx_mag_f32(output, mag, AUDIO_SAMPLES_NUMBER / 2); - arm_max_f32(mag + 1, AUDIO_SAMPLES_NUMBER / 2 - 1, &maxValue, &index); - //auto b = system_timer_current_time(); - - //DMESG("Before FFT: %d", (int)a); - //DMESG("After FFT: %d (%d)", (int)b, (int)(b - a)); - - uint32_t freq = ((uint32_t)MIC_SAMPLE_RATE / AUDIO_SAMPLES_NUMBER) * (index + 1); - lastFreq = (int) freq; - // DMESG("Freq: %d (max: %d.%d, Index: %d)", - // freq, - // (int)maxValue, - // ((int)(maxValue * 100) % 100), - // index); + position = 0; + + uint16_t from = frequencyToIndex(RECOGNITION_START_FREQ); + uint16_t to = min(frequencyToIndex(RECOGNITION_END_FREQ), AUDIO_SAMPLES_NUMBER / 2); + uint16_t size = to - from; + + uint16_t result_freq[size]; + uint8_t result_size = 0; + + #ifdef HANN_WINDOW + // arm_mult_f32 not found in arm_math.h? -- using an old version of arm_math.h + // arm_mult_f32(buf, hann_window, buf, AUDIO_SAMPLES_NUMBER); + for (uint16_t i=0;i ANALYSIS_STD_THRESHOLD && mean > ANALYSIS_MEAN_THRESHOLD) { + result_freq[0] = 0; + for(uint16_t i=0; i < size; i++) + // for now just picking the maximum if satisfies the conditions + if(mag[i] > threshold && mag[i] > result_freq[0]) + result_freq[0] = indexToFrequency(i + from), result_size = 1; + + } + + sendAnalysis(result_freq, result_size); } } diff --git a/source/MicroBitSoundRecogniser.cpp b/source/MicroBitSoundRecogniser.cpp new file mode 100644 index 00000000..87533d7c --- /dev/null +++ b/source/MicroBitSoundRecogniser.cpp @@ -0,0 +1,212 @@ + +#include "MicroBitSoundRecogniser.h" + +MicroBitSoundRecogniser::MicroBitSoundRecogniser(MicroBitAudioProcessor& audio_processor, MicroBit& uBit) + : audio_proceesor(audio_processor), uBit(uBit) { + analysing = false; + audio_proceesor.connect(this); + buffer_len = 0; + sounds_size = 0; +} + +MicroBitSoundRecogniser::~MicroBitSoundRecogniser(){ + if(sounds_size != 0){ + for(uint8_t i = 0; i < sounds_size; i++) { + delete sounds[i]; + delete sounds_names[i]; + } + delete [] sounds; + delete [] sounds_names; + } +} + +int MicroBitSoundRecogniser::pullRequest(){ + + auto frames = audio_proceesor.pull(); + + if(!analysing) return DEVICE_OK; + + // I only send one at a time when calling pullRequest from MicroBitAudioProcessor. + // Is there a way to make them concurrent -- might save some resources? + MicroBitAudioProcessor::AudioFrameAnalysis* buf = (MicroBitAudioProcessor::AudioFrameAnalysis* ) &frames[0]; + buffer[buffer_len].size = buf[0].size; + for(uint8_t i = 0; i update(buffer, buffer_len); + if(sounds[sound_it] -> matched()){ + if(callback != NULL) + callback(*sounds_names[sound_it]); + return DEVICE_OK; + } + } + + return DEVICE_OK; +} + +MicroBitAudioProcessor* MicroBitSoundRecogniser::getAudioProcessor(){ + return &audio_proceesor; +} + +void MicroBitSoundRecogniser::setCallback(void (*_callback)(ManagedString)){ + callback = _callback; +} + +void MicroBitSoundRecogniser::startAnalisying(void (*_callback)(ManagedString)){ + setCallback(_callback); + analysing = true; + audio_proceesor.startRecording(); +} + +void MicroBitSoundRecogniser::stopAnalisying(){ + analysing = false; + buffer_len = 0; + audio_proceesor.stopRecording(uBit); + for(uint8_t sound_it; sound_it < sounds_size; sound_it ++) + sounds[sound_it] -> resetHistory(); +} + +MicroBitSoundRecogniser::SoundSample::SoundSample(const uint16_t* _frames, uint8_t size) + : size(size) { + frames = new uint16_t[size]; + memcpy(frames, _frames, sizeof(uint16_t) * size); +} + +MicroBitSoundRecogniser::SoundSample::~SoundSample() { + delete[] frames; +} + +MicroBitSoundRecogniser::SoundSequence::SoundSequence( uint8_t size, + uint32_t threshold, + uint8_t deviation) + : size(size), threshold(threshold), + deviation(deviation) { + samples = new SoundSample* [size]; +} + +MicroBitSoundRecogniser::SoundSequence::~SoundSequence() { + for(uint8_t i = 0 ; i < size; i++) + delete samples[i]; + delete [] samples; +} + +MicroBitSoundRecogniser::Sound::Sound(uint8_t size, uint8_t max_zeros, uint8_t max_history_len) + : size(size), max_zeros(max_zeros), history_len(0), + max_history_len(max_history_len){ + sequences = new SoundSequence* [size]; + history = new uint8_t[2 * max_history_len * size]; +} + +MicroBitSoundRecogniser::Sound::~Sound() { + for(uint8_t i = 0 ; i < size; i++) + delete sequences[i]; + delete [] sequences; + delete [] history; +} + +void MicroBitSoundRecogniser::Sound::update(MicroBitAudioProcessor::AudioFrameAnalysis* buffer, + uint8_t buffer_len){ + for(uint8_t seq_it = 0; seq_it < size; seq_it ++) + addToHistory(seq_it, matchSequence(seq_it, buffer, buffer_len)); + endHistoryFrame(); +} + +bool MicroBitSoundRecogniser::Sound::matched() { + if(getZeros(1, size - 1) <= max_zeros){ + history_len = 0; + return true; + } + return false; +} + +uint8_t MicroBitSoundRecogniser::Sound::matchSequence(uint8_t seq_id, + MicroBitAudioProcessor::AudioFrameAnalysis* buffer, + uint8_t buffer_len) const { + SoundSequence* seq = sequences[seq_id]; + uint8_t min_zeros = 255; + for(uint8_t sample_it = 0; sample_it < seq -> size; sample_it ++) { + uint8_t sample_len = seq -> samples[sample_it] -> size; + if(buffer_len < sample_len) continue; + + uint8_t zeros = 255; + if(seq_id == 0) zeros = 0; + else if (seq_id && zeros > getZeros(sample_len, seq_id - 1)) + zeros = getZeros(sample_len, seq_id - 1); + else if (seq_id && zeros > getZeros(sample_len, seq_id - 1)) + zeros = getZeros(sample_len, seq_id - 1); + + if(zeros > max_zeros) continue; + + uint32_t dist = 0; + uint32_t diff = 0; + uint8_t nr_of_diffs = 0; + uint8_t deviations_left = seq->deviation; + + for(uint8_t i = 0; i < sample_len; i++ ){ + if(seq -> samples[sample_it] -> frames[i] == 0) continue; + if(buffer[buffer_len - sample_len + i ].size == 0){ + zeros ++; + continue; + } + + uint16_t freq = seq -> samples[sample_it] -> frames[i]; + + nr_of_diffs ++; + diff = 10000; + for(uint8_t j = 0; j < buffer[buffer_len - sample_len + i].size; j++){ + diff = min(diff, max(0, abs(freq - buffer[buffer_len - sample_len + i].buf[j]) - 100 )); + } + + if(deviations_left && diff*diff > nr_of_diffs * seq -> threshold){ + deviations_left --; + nr_of_diffs --; + } + else + dist += diff * diff; + } + + if(dist <= nr_of_diffs * seq -> threshold && min_zeros > zeros) { + min_zeros = zeros; + } + } + + return min_zeros; +} + + +uint8_t MicroBitSoundRecogniser::Sound::getZeros(uint8_t frames_ago, uint8_t seq_id) const { + if(history_len < frames_ago) return 255; + return history[(history_len - frames_ago) * size + seq_id]; +} + +void MicroBitSoundRecogniser::Sound::addToHistory(uint8_t seq_id, uint8_t value){ + history[history_len * size + seq_id] = value; +} + +void MicroBitSoundRecogniser::Sound::endHistoryFrame(){ + history_len ++; + // same type of buffer as the buffer from MicroBitSoundRecogniser + if(history_len == 2 * max_history_len) { + memcpy(&history[0], &history[max_history_len * size], sizeof(uint8_t) * max_history_len * size); + history_len = max_history_len; + } +} + +void MicroBitSoundRecogniser::Sound::resetHistory(){ + history_len = 0; +} + + + From 75234cb130c50170be1c663236ebb04d471d0836 Mon Sep 17 00:00:00 2001 From: Vlad Turcuman Date: Fri, 30 Apr 2021 17:50:15 +0100 Subject: [PATCH 2/5] Add Morse code classes and bug fixed audio processor and sound recogniser --- inc/EmojiRecogniser.h | 6 +- inc/MicroBitAudioProcessor.h | 42 ++-- inc/MicroBitMorseCodeRecogniser.h | 52 ++++ inc/MicroBitMorseCommunicator.h | 53 ++++ inc/MicroBitMorseMessage.h | 71 ++++++ inc/MicroBitSoundRecogniser.h | 16 +- source/EmojiRecogniser.cpp | 320 +++++++++++++++++++++---- source/MicroBitAudioProcessor.cpp | 158 ++++++------ source/MicroBitMorseCodeRecogniser.cpp | 174 ++++++++++++++ source/MicroBitMorseCommunicator.cpp | 119 +++++++++ source/MicroBitMorseMessage.cpp | 144 +++++++++++ source/MicroBitSoundRecogniser.cpp | 85 ++++--- 12 files changed, 1032 insertions(+), 208 deletions(-) create mode 100644 inc/MicroBitMorseCodeRecogniser.h create mode 100644 inc/MicroBitMorseCommunicator.h create mode 100644 inc/MicroBitMorseMessage.h create mode 100644 source/MicroBitMorseCodeRecogniser.cpp create mode 100644 source/MicroBitMorseCommunicator.cpp create mode 100644 source/MicroBitMorseMessage.cpp diff --git a/inc/EmojiRecogniser.h b/inc/EmojiRecogniser.h index 36a227a9..51a91c81 100644 --- a/inc/EmojiRecogniser.h +++ b/inc/EmojiRecogniser.h @@ -6,7 +6,11 @@ class EmojiRecogniser : public MicroBitSoundRecogniser { - void addHappySound(); + void addHappySound( MicroBit& ubit); + void addHelloSound( MicroBit& ubit); + void addSadSound( MicroBit& ubit); + void addSoaringSound( MicroBit& ubit); + void addTwinkleSound( MicroBit& ubit); public: EmojiRecogniser(MicroBitAudioProcessor& processor, diff --git a/inc/MicroBitAudioProcessor.h b/inc/MicroBitAudioProcessor.h index 53bebbf8..d8a297fb 100644 --- a/inc/MicroBitAudioProcessor.h +++ b/inc/MicroBitAudioProcessor.h @@ -27,20 +27,19 @@ DEALINGS IN THE SOFTWARE. #define MICROBIT_AUDIO_PROCESSOR_H #define MIC_SAMPLE_RATE (1000000 / MIC_SAMPLE_DELTA) -#define AUDIO_SAMPLES_NUMBER 512 -#define HISTORY_LENGTH 50 +#define DEFAULT_AUDIO_SAMPLES_NUMBER 512 +#define EMOJI_AUDIO_SAMPLES_NUMBER 512 +#define MORSE_AUDIO_SAMPLES_NUMBER 128 -#define RECOGNITION_START_FREQ 1400 -#define RECOGNITION_END_FREQ 4500 +#define RECOGNITION_START_FREQ 1700 +#define RECOGNITION_END_FREQ 5000 #define ANALYSIS_STD_MULT_THRESHOLD 3 -#define ANALYSIS_STD_THRESHOLD 75 +#define ANALYSIS_STD_THRESHOLD 60 #define ANALYSIS_MEAN_THRESHOLD 0 -// #define SQUARE_BEFORE_ANALYSIS -#define HARMONIC_PRODUCT_SPECTRUM -#define HANN_WINDOW - +#define MAXIMUM_NUMBER_OF_FREQUENCIES 3 +#define SIMILAR_FREQ_THRESHOLD 100 class MicroBitAudioProcessor : public DataSink, public DataSource { @@ -48,45 +47,38 @@ class MicroBitAudioProcessor : public DataSink, public DataSource struct AudioFrameAnalysis { uint8_t size; - uint16_t buf[3]; + uint16_t buf[MAXIMUM_NUMBER_OF_FREQUENCIES]; }; private: + + MicroBit& uBit; DataSource &audiostream; DataSink *recogniser; int zeroOffset; // unsigned value that is the best effort guess of the zero point of the data source int divisor; // Used for LINEAR modes + uint16_t audio_samples_number; arm_rfft_fast_instance_f32 fft_instance; float *buf; - float *output; + float *fft_output; float *mag; + uint16_t position; bool recording; -#ifdef HANN_WINDOW - float32_t hann_window[AUDIO_SAMPLES_NUMBER]; -#endif - - AudioFrameAnalysis out_buffer[HISTORY_LENGTH * 2]; - uint16_t out_buffer_len; - bool consumed_buffer; - - // What is this used for? Couldn't find any references to it - float rec[AUDIO_SAMPLES_NUMBER * 2]; - int lastFreq; + AudioFrameAnalysis output; uint16_t frequencyToIndex(int freq); float32_t indexToFrequency(int index); - void sendAnalysis(uint16_t* freq, uint8_t size); public: - MicroBitAudioProcessor(DataSource& source); + MicroBitAudioProcessor(DataSource& source, MicroBit& uBit, uint16_t audio_samples_number = DEFAULT_AUDIO_SAMPLES_NUMBER); ~MicroBitAudioProcessor(); virtual int pullRequest(); void connect(DataSink *downstream); virtual ManagedBuffer pull(); - int getFrequency(); + int setDivisor(int d); void startRecording(); void stopRecording(MicroBit& uBit); diff --git a/inc/MicroBitMorseCodeRecogniser.h b/inc/MicroBitMorseCodeRecogniser.h new file mode 100644 index 00000000..cebf753a --- /dev/null +++ b/inc/MicroBitMorseCodeRecogniser.h @@ -0,0 +1,52 @@ + +#ifndef MICROBIT_MORSE_CODE_RECOGNISER_H +#define MICROBIT_MORSE_CODE_RECOGNISER_H + +#include "DataStream.h" +#include "MicroBitAudioProcessor.h" +#include "arm_math.h" + +#define DETECTION_THRESHOLD 150 +#define MORSE_FRAME_TRUE_RATE_THRESHOLD 0.8 +#define MAX_TIME_UNIT 500 + +class MicroBitMorseCodeRecogniser : public DataSink +{ + MicroBitAudioProcessor& audio_proceesor; + MicroBit& uBit; + + uint16_t timeUnit; + uint16_t frequency; + + bool analysing; + bool syncronised; + unsigned int pauses; + + bool buffer[2 * MAX_TIME_UNIT]; + bool normalised_buffer[6]; + uint16_t buffer_len; + uint16_t normalised_buffer_len; + + void (*callback)(ManagedString) = NULL; + + void processFrame(MicroBitAudioProcessor::AudioFrameAnalysis* frame); + + bool recogniseLastMorseFrame(uint16_t to, uint16_t threshold ); + + public: + MicroBitMorseCodeRecogniser(MicroBitAudioProcessor& processor, MicroBit& uBit, uint16_t freq, uint16_t timeUnit) ; + + ~MicroBitMorseCodeRecogniser(); + + virtual int pullRequest(); + + MicroBitAudioProcessor* getAudioProcessor(); + + void setCallback (void (*_callback)(ManagedString)); + void startAnalisying(void (*_callback)(ManagedString)); + void stopAnalisying(); + +}; + + +#endif \ No newline at end of file diff --git a/inc/MicroBitMorseCommunicator.h b/inc/MicroBitMorseCommunicator.h new file mode 100644 index 00000000..7a8555ce --- /dev/null +++ b/inc/MicroBitMorseCommunicator.h @@ -0,0 +1,53 @@ +/* +The MIT License (MIT) +Copyright (c) 2020 Arm Limited. +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + +#include "MicroBit.h" + +#define DEFAULT_DURATION 500 +#define DEFAULT_FREQUENCY 2000 +#define DEFAULT_RANDOMNESS 0 + +class MicroBitMorseCommunicator +{ + private: + + int duration; + int frequency; + int randomness; + MicroBit* uBit; + + ManagedString dotFrame; + ManagedString dashFrame; + + void createFrames(); + void play(Symbol s); + + + public: + MicroBitMorseCommunicator(MicroBit* bit); + ~MicroBitMorseCommunicator(); + void send(MicroBitMorseMessage* mess); + + void set_duration(int d); + void set_frequency(int f); // this could be replaced by set_channel eventually + void set_randomness(int r); // this will probably be removed once we decide how much we want + + +}; \ No newline at end of file diff --git a/inc/MicroBitMorseMessage.h b/inc/MicroBitMorseMessage.h new file mode 100644 index 00000000..2b0c486c --- /dev/null +++ b/inc/MicroBitMorseMessage.h @@ -0,0 +1,71 @@ +/* +The MIT License (MIT) +Copyright (c) 2020 Arm Limited. +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + +#include "MicroBit.h" +#include "ManagedString.h" +#include + +/* +International Morse code is composed of five elements: + + short mark, dot or "dit": "dot duration" is one time unit long + longer mark, dash or "dah": three time units long + inter-element gap between the dots and dashes within a character: one unit long + short gap (between letters): three time units long + medium gap (between words): seven time units long + +*/ + +enum Symbol {Dot, Dash, Unit_Gap, Letter_Gap, Word_Gap}; + +class Node{ +public: + Symbol data; + Node* next; +}; + +struct cmpString { + bool operator()(ManagedString a, ManagedString b) const { + return (a < b); + } +}; + + +class MicroBitMorseMessage { + + private: + Node* head; + Node* end; + Node* curr; + MicroBit* uBit; + + static std::map toStr; + std::map toChar; + + public: + MicroBitMorseMessage(MicroBit* bit); + ~MicroBitMorseMessage(); + bool isEmpty(); + void push(Symbol s); + void push(char c); + void push(const char* c); + Symbol getSymbol(); + void get(char* out); +}; diff --git a/inc/MicroBitSoundRecogniser.h b/inc/MicroBitSoundRecogniser.h index 3be3cc6b..4d6d1bde 100644 --- a/inc/MicroBitSoundRecogniser.h +++ b/inc/MicroBitSoundRecogniser.h @@ -44,10 +44,14 @@ class MicroBitSoundRecogniser : public DataSink }; struct Sound { - Sound(uint8_t size, uint8_t max_zeros, uint8_t max_history_len); + Sound(uint8_t size, uint8_t max_deviation, uint8_t max_history_len, bool consider_all_frequencies, MicroBit& ubit); ~Sound(); - - uint8_t max_zeros; + + // debuging only + MicroBit& ubit; + + bool consider_all_frequencies; + uint8_t max_deviation; uint8_t size; SoundSequence** sequences; @@ -59,10 +63,10 @@ class MicroBitSoundRecogniser : public DataSink private: uint8_t matchSequence( uint8_t seq_id, - MicroBitAudioProcessor::AudioFrameAnalysis* buffer, - uint8_t buffer_len) const; + MicroBitAudioProcessor::AudioFrameAnalysis* buffer, + uint8_t buffer_len) const; - uint8_t getZeros(uint8_t frames_ago, uint8_t seq_id) const; + uint8_t getDeviation(uint8_t frames_ago, uint8_t seq_id) const; void addToHistory(uint8_t seq_id, uint8_t value); void endHistoryFrame(); diff --git a/source/EmojiRecogniser.cpp b/source/EmojiRecogniser.cpp index 5cb4af1a..a9f7b8c6 100644 --- a/source/EmojiRecogniser.cpp +++ b/source/EmojiRecogniser.cpp @@ -1,72 +1,286 @@ #include "EmojiRecogniser.h" - EmojiRecogniser::EmojiRecogniser( MicroBitAudioProcessor& processor, MicroBit& uBit) : MicroBitSoundRecogniser(processor, uBit){ - sounds = new Sound* [7]; - sounds_names = new ManagedString* [7]; - addHappySound(); + sounds = new Sound* [5]; + sounds_names = new ManagedString* [5]; + addHappySound(uBit); + addHelloSound(uBit); + addSadSound(uBit); + addSoaringSound(uBit); + addTwinkleSound(uBit); } -void EmojiRecogniser::addHappySound() { - - uint16_t happy[3][6][8] = { - // First sequence - { - { 4, 4368, 0, 0, 2751}, - { 5, 4431, 0, 0, 0, 2751}, - { 6, 4473, 0, 0, 0, 0, 2751}, - { 5, 4263, 0, 0, 0, 2751}, - { 6, 4263, 0, 0, 0, 0, 2751}, - { 4, 4263, 0, 0, 2751} - }, - // Second sequence - { - { 5, 3024, 3024, 3276, 3276, 3255}, - { 6, 3024, 3024, 3717, 3276, 3276, 3255}, - { 5, 2751, 3003, 3276, 3276, 3276} - }, - // Third sequence - { - { 6, 3423, 3276, 3276, 3024, 3024, 3024}, - { 6, 3423, 3276, 3276, 3255, 3024, 3024}, - { 6, 3423, 3276, 3276, 3024, 3024, 2961}, - { 6, 3276, 3234, 3234, 3276, 3003, 3003} - } - }; - - uint16_t happy_thresholds[3] = { - 50 * 50, - 50 * 100, - 50 * 100 - }; - - uint8_t happy_deviations[3] = { - 0, - 1, - 1 - }; - - uint8_t happy_samples[3] = { - 6, - 3, - 4 - }; + +// HAPPY Sound ---- + +const uint8_t happy_sequences = 2; +const uint8_t happy_max_deviations = 3; + +uint16_t happy_samples[happy_sequences][3][11] = { + // First sequence + { + { 3, 2037, 2289, 2289}, + { 3, 2037, 2310, 2541}, + { 3, 2037, 2289, 2310} + }, + { + { 8, 2562, 2562, 2562, 2541, 2730, 2562, 2289, 2289}, + { 10, 2562, 2562, 2562, 2562, 2730, 2562, 2541, 2310, 2310, 2289}, + { 10, 0, 2562, 2562, 2541, 2730, 2541, 2562, 2310, 2289, 2289} + + } +}; + +uint16_t happy_thresholds[happy_sequences] = { + 50, + 50 +}; + +uint8_t happy_deviations[happy_sequences] = { + 2, + 3 +}; + +uint8_t happy_nr_samples[happy_sequences] = { + 3, + 3 +}; + + +void EmojiRecogniser::addHappySound( MicroBit& ubit) { uint8_t it = sounds_size; sounds_size ++; sounds_names[it] = new ManagedString("happy"); - sounds[it] = new Sound(2, 3, 10); + sounds[it] = new Sound(happy_sequences, happy_max_deviations, 14, true, ubit); + + for(uint8_t i = 0; i < happy_sequences; i++){ + sounds[it] -> sequences[i] = new SoundSequence(happy_nr_samples[i], happy_thresholds[i], happy_deviations[i]); + for(uint8_t j = 0; j < happy_nr_samples[i]; j ++) + sounds[it] -> sequences[i] -> samples[j] = new SoundSample(happy_samples[i][j] + 1, happy_samples[i][j][0]); + } + +} + +// HELLO Sound ---- + + +const uint8_t hello_sequences = 2; +const uint8_t hello_max_deviations = 3; + +uint16_t hello_samples[hello_sequences][3][10] = { + // First sequence + { + { 3, 2688, 2919, 3528}, + { 3, 2751, 2919, 3528}, + { 3, 2688, 3402, 3528} + }, + { + { 9, 3507, 3150, 3087, 3003, 2961, 2940, 2940, 2961, 2940}, + { 9, 3339, 3108, 3024, 2982, 2940, 2940, 2961, 2961, 2940}, + { 9, 3381, 3150, 3087, 3003, 2961, 2940, 2940, 2961, 2961} + } +}; + +uint16_t hello_thresholds[hello_sequences] = { + 50, + 80 +}; + +uint8_t hello_deviations[hello_sequences] = { + 2, + 3 +}; + +uint8_t hello_nr_samples[hello_sequences] = { + 3, + 3 +}; + + +void EmojiRecogniser::addHelloSound( MicroBit& ubit) { + uint8_t it = sounds_size; + sounds_size ++; + sounds_names[it] = new ManagedString("hello"); + + sounds[it] = new Sound(hello_sequences, hello_max_deviations, 12, true, ubit); + + for(uint8_t i = 0; i < hello_sequences; i++){ + sounds[it] -> sequences[i] = new SoundSequence(hello_nr_samples[i], hello_thresholds[i], hello_deviations[i]); + for(uint8_t j = 0; j < hello_nr_samples[i]; j ++) + sounds[it] -> sequences[i] -> samples[j] = new SoundSample(hello_samples[i][j] + 1, hello_samples[i][j][0]); + } +} + + +// SAD Sound ---- + +const uint8_t sad_sequences = 2; +const uint8_t sad_max_deviations = 6; + +uint16_t sad_samples[sad_sequences][3][16] = { + // First sequence + { + { 8, 3423, 3339, 3255, 3087, 2961, 2856, 2709, 2604}, + { 8, 3381, 3318, 3192, 3045, 2898, 2793, 2625, 2520}, + { 8, 3318, 3255, 3087, 2919, 2793, 2688, 2562, 2436} + }, + { + { 15, 3591, 3423, 3318, 3171, 3024, 2940, 2877, 2835, 2814, 2814, 2835, 2898, 2940, 3045, 2898}, + { 15, 3507, 3423, 3213, 3087, 3003, 2919, 2856, 2814, 2814, 2814, 2856, 2919, 3003, 3150, 2898}, + { 14, 3402, 3234, 3108, 3045, 2940, 2856, 2814, 2814, 2814, 2856, 2898, 2982, 3066, 2898} + } +}; + +uint16_t sad_thresholds[sad_sequences] = { + 50, + 100 +}; + +uint8_t sad_deviations[sad_sequences] = { + 4, + 6 +}; + +uint8_t sad_nr_samples[sad_sequences] = { + 3, + 3 +}; + + +void EmojiRecogniser::addSadSound( MicroBit& ubit) { + uint8_t it = sounds_size; + sounds_size ++; + sounds_names[it] = new ManagedString("sad"); + + sounds[it] = new Sound(sad_sequences, sad_max_deviations, 18, true, ubit); + + for(uint8_t i = 0; i < sad_sequences; i++){ + sounds[it] -> sequences[i] = new SoundSequence(sad_nr_samples[i], sad_thresholds[i], sad_deviations[i]); + for(uint8_t j = 0; j < sad_nr_samples[i]; j ++) + sounds[it] -> sequences[i] -> samples[j] = new SoundSample(sad_samples[i][j] + 1, sad_samples[i][j][0]); + } +} + + +// SOARING Sound ---- + +const uint8_t soaring_sequences = 4; +const uint8_t soaring_max_deviations = 15; - for(uint8_t i = 1; i < 3 ; i++){ - sounds[it] -> sequences[i -1] = new SoundSequence(happy_samples[i], happy_thresholds[i], happy_deviations[i]); - for(uint8_t j = 0; j < happy_samples[i]; j ++) - sounds[it] -> sequences[i-1] -> samples[j] = new SoundSample(happy[i][j] + 1, happy[i][j][0]); +uint16_t soaring_samples[soaring_sequences][3][10] = { + // First sequence + { + { 9, 2499, 2499, 2814, 2688, 2646, 2646, 2898, 2898, 3759}, + { 9, 2520, 2520, 2835, 2667, 2667, 2898, 2898, 2898, 3759}, + { 9, 2499, 2499, 2814, 2814, 2646, 2646, 2898, 2898, 3759} + }, + { + { 7, 3759, 3003, 2646, 2688, 2667, 2667, 4599}, + { 7, 3003, 3003, 2646, 2646, 2667, 2667, 4599}, + { 7, 3759, 3003, 3003, 2646, 2646, 2667, 4599} + }, + { + { 4, 3528, 2625, 2625, 3507}, + { 4, 4599, 2625, 2625, 3507}, + { 4, 4599, 2667, 3528, 3507} + }, + { + { 7, 3528, 3927, 2499, 2499, 2499, 2646, 2646} } +}; + +const uint16_t soaring_thresholds[soaring_sequences] = { + 150, + 100, + 100, + 100 +}; + +const uint8_t soaring_deviations[soaring_sequences] = { + 5, + 5, + 5, + 5 +}; + +const uint8_t soaring_nr_samples[soaring_sequences] = { + 3, + 3, + 3, + 1 +}; + +void EmojiRecogniser::addSoaringSound( MicroBit& ubit) { + uint8_t it = sounds_size; + sounds_size ++; + sounds_names[it] = new ManagedString("soaring"); + + sounds[it] = new Sound(soaring_sequences, soaring_max_deviations, 15, true, ubit); + + for(uint8_t i = 0; i < soaring_sequences; i++){ + sounds[it] -> sequences[i] = new SoundSequence(soaring_nr_samples[i], soaring_thresholds[i], soaring_deviations[i]); + for(uint8_t j = 0; j < soaring_nr_samples[i]; j ++) + sounds[it] -> sequences[i] -> samples[j] = new SoundSample(soaring_samples[i][j] + 1, soaring_samples[i][j][0]); + } } +// TWINKLE Sound ---- + +const uint8_t twinkle_sequences = 3; +const uint8_t twinkle_max_deviations = 4; + +uint16_t twinkle_samples[twinkle_sequences][2][7] = { + // First sequence + { + { 4, 2163, 1953, 2604, 2604}, + { 4, 2163, 2163, 2604, 2604} + }, + { + { 5, 2436, 2163, 2604, 2310, 2709}, + { 5, 2436, 2163, 2604, 2604, 2709} + }, + { + { 6, 0, 2604, 2604, 2436, 2520, 2604}, + { 6, 0, 2604, 2604, 2436, 2898, 2604} + } +}; + +const uint16_t twinkle_thresholds[twinkle_sequences] = { + 80, + 80, + 80 +}; + +const uint8_t twinkle_deviations[twinkle_sequences] = { + 2, + 3, + 3 +}; + +const uint8_t twinkle_nr_samples[twinkle_sequences] = { + 2, + 2, + 2 +}; + + +void EmojiRecogniser::addTwinkleSound( MicroBit& ubit) { + uint8_t it = sounds_size; + sounds_size ++; + sounds_names[it] = new ManagedString("twinkle"); + + sounds[it] = new Sound(twinkle_sequences, twinkle_max_deviations, 15, true, ubit); + + for(uint8_t i = 0; i < twinkle_sequences; i++){ + sounds[it] -> sequences[i] = new SoundSequence(twinkle_nr_samples[i], twinkle_thresholds[i], twinkle_deviations[i]); + for(uint8_t j = 0; j < twinkle_nr_samples[i]; j ++) + sounds[it] -> sequences[i] -> samples[j] = new SoundSample(twinkle_samples[i][j] + 1, twinkle_samples[i][j][0]); + } +} \ No newline at end of file diff --git a/source/MicroBitAudioProcessor.cpp b/source/MicroBitAudioProcessor.cpp index 1b7302e9..80e639af 100644 --- a/source/MicroBitAudioProcessor.cpp +++ b/source/MicroBitAudioProcessor.cpp @@ -21,33 +21,33 @@ DEALINGS IN THE SOFTWARE. #include "MicroBit.h" #include "MicroBitAudioProcessor.h" +#include +#include -MicroBitAudioProcessor::MicroBitAudioProcessor(DataSource& source) - : audiostream(source), recogniser(NULL) { +MicroBitAudioProcessor::MicroBitAudioProcessor( DataSource& source, + MicroBit& uBit, + uint16_t audio_samples_number) + : audiostream(source), + recogniser(NULL), + uBit(uBit), + audio_samples_number(audio_samples_number) { divisor = 1; - lastFreq = 0; - arm_rfft_fast_init_f32(&fft_instance, AUDIO_SAMPLES_NUMBER); + + arm_rfft_fast_init_f32(&fft_instance, audio_samples_number); /* Double Buffering: We allocate twice the number of samples*/ - // the C++ way for doing this is (as far as I know) - // new float32_t[AUDIO_SAMPLES_NUMBER * 2]; - buf = (float *)malloc(sizeof(float) * AUDIO_SAMPLES_NUMBER * 2); - output = (float *)malloc(sizeof(float) * AUDIO_SAMPLES_NUMBER); - mag = (float *)malloc(sizeof(float) * AUDIO_SAMPLES_NUMBER / 2); - - memset(buf, 0, sizeof(buf)); + buf = new float[audio_samples_number * 2]; + fft_output = new float[audio_samples_number]; + mag = new float[audio_samples_number / 2]; -#ifdef HANN_WINDOW - for(int i=0; i < AUDIO_SAMPLES_NUMBER; i++) - hann_window[i] = 0.5 * (1 - arm_cos_f32(2 * 3.14159265 * i / AUDIO_SAMPLES_NUMBER)); -#endif + memset(buf, 0, sizeof(buf)); position = 0; recording = false; - if (buf == NULL || output == NULL || mag == NULL) { + if (buf == NULL || fft_output == NULL || mag == NULL || played == NULL) { DMESG("DEVICE_NO_RESOURCES"); target_panic(DEVICE_OOM); } @@ -57,53 +57,28 @@ MicroBitAudioProcessor::MicroBitAudioProcessor(DataSource& source) MicroBitAudioProcessor::~MicroBitAudioProcessor() { - free(buf); - free(output); - free(mag); + delete buf; + delete fft_output; + delete mag; } uint16_t MicroBitAudioProcessor::frequencyToIndex(int freq) { - return (freq / ((uint32_t)MIC_SAMPLE_RATE / AUDIO_SAMPLES_NUMBER)); + return (freq / ((uint32_t)MIC_SAMPLE_RATE / audio_samples_number)); } float32_t MicroBitAudioProcessor::indexToFrequency(int index) { - return ((uint32_t)MIC_SAMPLE_RATE / AUDIO_SAMPLES_NUMBER) * index; + return ((uint32_t)MIC_SAMPLE_RATE / audio_samples_number) * index; } -void MicroBitAudioProcessor::sendAnalysis(uint16_t* freq, uint8_t size) { - - if(consumed_buffer) - out_buffer_len = 0; - - if(out_buffer_len == 2 * HISTORY_LENGTH){ - memcpy(out_buffer, &out_buffer[HISTORY_LENGTH], (sizeof(AudioFrameAnalysis) * HISTORY_LENGTH)); - out_buffer_len = HISTORY_LENGTH; - } - - out_buffer[out_buffer_len].size = min(size, 3); - if(size > 2) - out_buffer[out_buffer_len].buf[2] = freq[2]; - if(size > 1) - out_buffer[out_buffer_len].buf[1] = freq[1]; - if(size > 0) - out_buffer[out_buffer_len].buf[0] = freq[0]; - out_buffer_len ++; - - if(recogniser) - recogniser -> pullRequest(); -} - void MicroBitAudioProcessor::connect(DataSink *downstream){ recogniser = downstream; } ManagedBuffer MicroBitAudioProcessor::pull() { - consumed_buffer = true; - return ManagedBuffer((uint8_t*) out_buffer, sizeof(AudioFrameAnalysis) * out_buffer_len); + return ManagedBuffer(((uint8_t *) (&output)), (int) sizeof(AudioFrameAnalysis)); } - int MicroBitAudioProcessor::pullRequest() { @@ -115,8 +90,6 @@ int MicroBitAudioProcessor::pullRequest() if (!recording) return DEVICE_OK; - // Should be int8_t as this is the type the microphone - // records it int8_t *data = (int8_t *) &mic_samples[0]; int samples = mic_samples.length(); @@ -132,69 +105,72 @@ int MicroBitAudioProcessor::pullRequest() buf[position++] = (float)result; - if (!(position % AUDIO_SAMPLES_NUMBER)) + if (!(position % audio_samples_number)) { position = 0; uint16_t from = frequencyToIndex(RECOGNITION_START_FREQ); - uint16_t to = min(frequencyToIndex(RECOGNITION_END_FREQ), AUDIO_SAMPLES_NUMBER / 2); - uint16_t size = to - from; - - uint16_t result_freq[size]; - uint8_t result_size = 0; - - #ifdef HANN_WINDOW - // arm_mult_f32 not found in arm_math.h? -- using an old version of arm_math.h - // arm_mult_f32(buf, hann_window, buf, AUDIO_SAMPLES_NUMBER); - for (uint16_t i=0;i ANALYSIS_STD_THRESHOLD && mean > ANALYSIS_MEAN_THRESHOLD) { - result_freq[0] = 0; - for(uint16_t i=0; i < size; i++) - // for now just picking the maximum if satisfies the conditions - if(mag[i] > threshold && mag[i] > result_freq[0]) - result_freq[0] = indexToFrequency(i + from), result_size = 1; + std::vector> freq_played; + + for(uint16_t i=from; i < to; i++) + if(mag[i] > threshold) + freq_played.push_back(std::make_pair(indexToFrequency(i), mag[i])); + + sort(freq_played.begin(), freq_played.end(), + [&](std::pair a, + std::pair b) { + return a.second > b.second; + }); + for(uint16_t i = 0; i < freq_played.size(); i++) { + if(output.size == 0) { + output.buf[output.size ++] = freq_played[i].first; + continue; + } + + bool similar_found = false; + for (uint16_t j = 0; j < output.size; j ++) + if(abs(output.buf[j] - freq_played[i].first) <= SIMILAR_FREQ_THRESHOLD) + similar_found = true; + + if(!similar_found) { + output.buf[output.size ++] = freq_played[i].first; + if(output.size >= MAXIMUM_NUMBER_OF_FREQUENCIES) + break; + } + } } - - sendAnalysis(result_freq, result_size); + + if(recogniser) + recogniser -> pullRequest(); } } return DEVICE_OK; } -int MicroBitAudioProcessor::getFrequency(){ - return lastFreq; -} - int MicroBitAudioProcessor::setDivisor(int d) { diff --git a/source/MicroBitMorseCodeRecogniser.cpp b/source/MicroBitMorseCodeRecogniser.cpp new file mode 100644 index 00000000..9eb92fda --- /dev/null +++ b/source/MicroBitMorseCodeRecogniser.cpp @@ -0,0 +1,174 @@ +#include "MicroBitMorseCodeRecogniser.h" + + +MicroBitMorseCodeRecogniser::MicroBitMorseCodeRecogniser(MicroBitAudioProcessor& audio_processor, MicroBit& uBit, uint16_t freq, uint16_t _timeUnit) + : audio_proceesor(audio_processor), uBit(uBit), frequency(freq) { + analysing = false; + audio_proceesor.connect(this); + buffer_len = 0; + + timeUnit = 1.0 * MIC_SAMPLE_RATE * _timeUnit / 1000 / MORSE_AUDIO_SAMPLES_NUMBER + 0.5; + + memset(buffer, 0, sizeof(buffer)); + memset(normalised_buffer, 0, sizeof(normalised_buffer)); + + syncronised = false; + pauses = 0; + + uBit.serial.send("time unit: ") ; + uBit.serial.send(ManagedString((int) timeUnit)) ; + uBit.serial.send("\n") ; +} + +MicroBitMorseCodeRecogniser::~MicroBitMorseCodeRecogniser(){ +} + +MicroBitAudioProcessor* MicroBitMorseCodeRecogniser::getAudioProcessor(){ + return &audio_proceesor; +} + +void MicroBitMorseCodeRecogniser::setCallback(void (*_callback)(ManagedString)){ + callback = _callback; +} + +void MicroBitMorseCodeRecogniser::startAnalisying(void (*_callback)(ManagedString)){ + setCallback(_callback); + analysing = true; + audio_proceesor.startRecording(); +} + +void MicroBitMorseCodeRecogniser::stopAnalisying(){ + analysing = false; + buffer_len = 0; + audio_proceesor.stopRecording(uBit); +} + + +int MicroBitMorseCodeRecogniser::pullRequest() { + + auto frames = audio_proceesor.pull(); + + if(!analysing) return DEVICE_OK; + + MicroBitAudioProcessor::AudioFrameAnalysis* buf = (MicroBitAudioProcessor::AudioFrameAnalysis* ) &frames[0]; + uint16_t size = frames.length() / sizeof(MicroBitAudioProcessor::AudioFrameAnalysis); + + for(uint16_t i = 0; i < size; i++) + processFrame(&buf[i]); + + return DEVICE_OK; +} + + +bool MicroBitMorseCodeRecogniser::recogniseLastMorseFrame(uint16_t to, uint16_t threshold = 255) { + if(to < timeUnit) return false; + if(threshold == 255) threshold = timeUnit * MORSE_FRAME_TRUE_RATE_THRESHOLD; + + uint16_t nr_true = 0; + for(uint16_t it = to - timeUnit; it < to; it ++) + if(buffer[it]) nr_true ++; + + return nr_true >= threshold; +} + +void MicroBitMorseCodeRecogniser::processFrame(MicroBitAudioProcessor::AudioFrameAnalysis* frame) { + + // if (frame -> size >= 1) + // uBit.serial.send(ManagedString((int)frame -> buf[0])); + // uBit.serial.send("\n"); + + // uBit.serial.send("\n"); + + bool detected_freq = false; + for (uint16_t i = 0; i < frame -> size && !detected_freq; i++ ) + if(abs(frame -> buf[i] - frequency) < DETECTION_THRESHOLD) + detected_freq = true; + + bool added = false; + + buffer[buffer_len] = detected_freq; + buffer_len ++; + + + // uBit.serial.send(ManagedString((int)detected_freq) ); + + if(!syncronised && buffer_len > timeUnit && recogniseLastMorseFrame(buffer_len, timeUnit - 1) && + buffer[buffer_len - timeUnit]) { + normalised_buffer_len = 0; + buffer_len = 0; + syncronised = true; + + normalised_buffer[normalised_buffer_len] = true; + normalised_buffer_len ++; + + added = true; + + if(pauses <= 4) callback(ManagedString(" ")); + else callback(ManagedString(";")); + + pauses = 0; + + // if(normalised_buffer[normalised_buffer_len-1]) uBit.serial.send(" 1\n"); + // else uBit.serial.send(" .\n"); + } + + if(syncronised && buffer_len == timeUnit){ + normalised_buffer[normalised_buffer_len] = recogniseLastMorseFrame(buffer_len); + normalised_buffer_len ++; + buffer_len = 0; + + added = true; + + // if(normalised_buffer[normalised_buffer_len-1]) uBit.serial.send(" 1\n"); + // else uBit.serial.send(" .\n"); + } + + if(buffer_len == 2 * timeUnit) { + pauses ++; + if (syncronised) + normalised_buffer[normalised_buffer_len ++] = recogniseLastMorseFrame(buffer_len - timeUnit); + memcpy(buffer, &buffer[timeUnit], sizeof(bool) * timeUnit); + buffer_len = timeUnit; + + // uBit.serial.send("|"); + + added = true; + } + + + if(!syncronised || !added) return; + + // end of word + if(normalised_buffer_len == 6) { + callback(ManagedString(";")); + syncronised = false; + normalised_buffer_len = 0; + return; + } + + if (normalised_buffer_len == 1 && !normalised_buffer[0]) { + pauses ++; + normalised_buffer_len = 0; + syncronised = false; + } + + // Not enough data + if (normalised_buffer_len < 2) return; + + bool firstFrame = normalised_buffer[normalised_buffer_len - 2]; + bool secondFrame = normalised_buffer[normalised_buffer_len - 1]; + + // end of symbol + if(firstFrame && !secondFrame) { + // too short for dash => dot + if(normalised_buffer_len <= 2) + callback(ManagedString(".")); + else + callback(ManagedString("-")); + + pauses = 1; + normalised_buffer_len = 0; + return; + } +} + diff --git a/source/MicroBitMorseCommunicator.cpp b/source/MicroBitMorseCommunicator.cpp new file mode 100644 index 00000000..6e597d2a --- /dev/null +++ b/source/MicroBitMorseCommunicator.cpp @@ -0,0 +1,119 @@ +/* +The MIT License (MIT) +Copyright (c) 2020 Arm Limited. +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + +#include "MicroBit.h" +#include "MicroBitMorseMessage.h" +#include "MicroBitMorseCommunicator.h" + + +MicroBitMorseCommunicator::MicroBitMorseCommunicator(MicroBit* bit){ + uBit = bit; + duration = DEFAULT_DURATION; + frequency = DEFAULT_FREQUENCY; + randomness = DEFAULT_RANDOMNESS; + + createFrames(); +} + +// turns n into a managed string of size 4 +ManagedString fourString(int n){ + if (n < 10) + return ManagedString("000") + ManagedString(n); + if (n < 100) + return ManagedString("00") + ManagedString(n); + if (n < 1000) + return ManagedString("0") + ManagedString(n); + if (n > 9999) + return ManagedString("9999"); + return ManagedString(n); +} + +void MicroBitMorseCommunicator::createFrames(){ + ManagedString freqString = fourString(frequency); + ManagedString dotDurationString = fourString(duration); + ManagedString dashDurationString = fourString(duration*3); + ManagedString randomString = fourString(randomness); + + + dotFrame = ManagedString("01023")+ + freqString+ + dotDurationString+ + ManagedString("02440")+ + freqString+ + ManagedString("0888102301280002000024")+ + randomString+ + ManagedString("000000000000000000000000"); + + dashFrame = ManagedString("01023")+ + freqString+ + dashDurationString+ + ManagedString("02440")+ + freqString+ + ManagedString("0888102301280002000024")+ + randomString+ + ManagedString("000000000000000000000000"); +} + + +MicroBitMorseCommunicator::~MicroBitMorseCommunicator(){ + return; +} + +void MicroBitMorseCommunicator::play(Symbol s){ + // uBit->serial.printf("%d\n", s); + switch(s){ + case Unit_Gap: + uBit->sleep(duration); + break; + case Letter_Gap: + uBit->sleep(duration * 3); + break; + case Word_Gap: + uBit->sleep(duration * 7); + break; + case Dot: + uBit->audio.soundExpressions.play(dotFrame); + break; + case Dash: + uBit->audio.soundExpressions.play(dashFrame); + break; + } +} + +void MicroBitMorseCommunicator::send(MicroBitMorseMessage* mess){ + while (!mess->isEmpty()){ + this->play(mess->getSymbol()); + } +} + +void MicroBitMorseCommunicator::set_duration(int d){ + duration = d; + createFrames(); +} + +void MicroBitMorseCommunicator::set_frequency(int f){ + frequency = f; + createFrames(); +} + +void MicroBitMorseCommunicator::set_randomness(int r){ + randomness = r; + createFrames(); +} \ No newline at end of file diff --git a/source/MicroBitMorseMessage.cpp b/source/MicroBitMorseMessage.cpp new file mode 100644 index 00000000..6e299dcf --- /dev/null +++ b/source/MicroBitMorseMessage.cpp @@ -0,0 +1,144 @@ +/* +The MIT License (MIT) +Copyright (c) 2020 Arm Limited. +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + +#include "MicroBit.h" +#include "MicroBitMorseMessage.h" + +std::map MicroBitMorseMessage::toStr = { + {'A', ".-"}, {'B', "-..."}, {'C', "-.-."}, {'D', "-.."}, {'E', "."}, {'F', "..-."}, {'G', "--."}, {'H', "...."}, {'I', ".."}, {'J', ".---"}, + {'K', "-.-"}, {'L', ".-.."}, {'M', "--"}, {'N', "-."}, {'O', "---"}, {'P', ".--."}, {'Q', "--.-"}, {'R', ".-."}, {'S', "..."}, {'T', "-"}, + {'U', "..-"}, {'V', "...-"}, {'W', ".--"}, {'X', "-..-"}, {'Y', "-.--"}, {'Z', "--.."}, {'1', ".----"}, {'2', "..---"}, {'3', "...--"}, + {'4', "....-"}, {'5', "....."}, {'6', "-...."}, {'7', "--..."}, {'8', "---.."}, {'9', "----."}, {'0', "-----"}}; + +MicroBitMorseMessage::MicroBitMorseMessage(MicroBit* bit) { + head = new Node(); + end = head; + curr = head; + + uBit = bit; + + for(std::map::iterator it = toStr.begin(); it != toStr.end(); ++it) { + toChar[it->second] = it->first; + } +} + +MicroBitMorseMessage::~MicroBitMorseMessage() { + Node* i = head; + Node* j = 0; + while (i != 0) { + j = i->next; + free(i); + i = j; + } +} + +Symbol MicroBitMorseMessage::getSymbol() { + curr = curr->next; + return curr->data; +} + +void MicroBitMorseMessage::get(char* out){ + int i = 0, si = 0; + char s[10]; + Symbol x; + + while (!isEmpty()){ + x = getSymbol(); + switch(x){ + case Dot: + s[si] = '.'; + si++; + break; + case Dash: + s[si] = '-'; + si++; + break; + case Unit_Gap: + break; + default: + if (si!=0){ + s[si] = 0; + si = 0; + out[i] = toChar[s]; + i++; + } + if (x == Word_Gap){ + out[i] = ' '; + i++; + } + } + } + if (si!=0){ + s[si] = 0; + si = 0; + out[i] = toChar[s]; + i++; + } + out[i] = 0; +} + +bool MicroBitMorseMessage::isEmpty() { + return (curr->next == 0); +} + +void MicroBitMorseMessage::push(Symbol s) { + Node* aux = new Node(); + aux->data = s; + end->next = aux; + end = aux; +} + +void MicroBitMorseMessage::push(char c) { + // capitalize letters + if (('a' <= c) && (c <= 'z')) c -= ('a' - 'A'); + + // turn unknown characters into E + if (c != ' ' && (c < 'A' || c > 'Z') && (c < '0' || c > '9')) c = 'E'; + + if (c == ' ') push(Word_Gap); + + // add space before every char except the first of every word + if ( head->next != 0 && (end->data == Dot || end->data == Dash)) push(Letter_Gap); + + ManagedString s = toStr[c]; + int i = 0; + while (s.charAt(i)!=0) { + // add space before every dot or dash except the first + if (i!=0) push(Unit_Gap); + + if (s.charAt(i) == '.') push(Dot); + else push(Dash); + i++; + } + i = 0; + while (s.charAt(i)!=0) { + // uBit->serial.printf("%c", s.charAt(i)); + i++; + } + +} + +void MicroBitMorseMessage::push(const char* c) { + int i = 0; + while (c[i]!=0) { + push(c[i]); + i++; + } +} diff --git a/source/MicroBitSoundRecogniser.cpp b/source/MicroBitSoundRecogniser.cpp index 87533d7c..a3ca5f53 100644 --- a/source/MicroBitSoundRecogniser.cpp +++ b/source/MicroBitSoundRecogniser.cpp @@ -34,6 +34,12 @@ int MicroBitSoundRecogniser::pullRequest(){ buffer[buffer_len].buf[i] = buf[0].buf[i]; buffer_len ++; + + for(int i=0; i size; sample_it ++) { uint8_t sample_len = seq -> samples[sample_it] -> size; if(buffer_len < sample_len) continue; - uint8_t zeros = 255; - if(seq_id == 0) zeros = 0; - else if (seq_id && zeros > getZeros(sample_len, seq_id - 1)) - zeros = getZeros(sample_len, seq_id - 1); - else if (seq_id && zeros > getZeros(sample_len, seq_id - 1)) - zeros = getZeros(sample_len, seq_id - 1); + uint8_t deviation = 255; + if(seq_id == 0) deviation = 0; + else if (seq_id && deviation > getDeviation(sample_len, seq_id - 1)) + deviation = getDeviation(sample_len, seq_id - 1); + else if (seq_id && deviation > getDeviation(sample_len + 1, seq_id - 1)) + deviation = getDeviation(sample_len, seq_id - 1); - if(zeros > max_zeros) continue; + // ubit.serial.send(ManagedString("match init dev: ")); + // ubit.serial.send(ManagedString((int) deviation)); + // ubit.serial.send(ManagedString("\n ")); + + if(deviation > max_deviation || deviation >= min_dev) continue; - uint32_t dist = 0; uint32_t diff = 0; uint8_t nr_of_diffs = 0; uint8_t deviations_left = seq->deviation; - for(uint8_t i = 0; i < sample_len; i++ ){ + for(uint8_t i = 0; i < sample_len; i++) { if(seq -> samples[sample_it] -> frames[i] == 0) continue; - if(buffer[buffer_len - sample_len + i ].size == 0){ - zeros ++; + if(buffer[buffer_len - sample_len + i].size == 0) { + deviation ++; continue; } uint16_t freq = seq -> samples[sample_it] -> frames[i]; - nr_of_diffs ++; - diff = 10000; - for(uint8_t j = 0; j < buffer[buffer_len - sample_len + i].size; j++){ - diff = min(diff, max(0, abs(freq - buffer[buffer_len - sample_len + i].buf[j]) - 100 )); - } + diff = abs(freq - buffer[buffer_len - sample_len + i].buf[0]); - if(deviations_left && diff*diff > nr_of_diffs * seq -> threshold){ + if(consider_all_frequencies) + for(uint8_t j = 1; j < buffer[buffer_len - sample_len + i].size; j++) + diff = min(diff, abs(freq - buffer[buffer_len - sample_len + i].buf[j]) ); + + if(deviations_left && diff > seq -> threshold && deviation < max_deviation){ deviations_left --; - nr_of_diffs --; + deviation ++; + } + else if(diff > seq -> threshold ){ + deviation = 255; + break; } - else - dist += diff * diff; } - if(dist <= nr_of_diffs * seq -> threshold && min_zeros > zeros) { - min_zeros = zeros; - } + // ubit.serial.send(ManagedString("match end dev: ")); + // ubit.serial.send(ManagedString((int) deviation)); + // ubit.serial.send(ManagedString("\n ")); + + + if(deviation < min_dev && deviation <= max_deviation) + min_dev = deviation; + } - return min_zeros; + return min_dev; } -uint8_t MicroBitSoundRecogniser::Sound::getZeros(uint8_t frames_ago, uint8_t seq_id) const { +uint8_t MicroBitSoundRecogniser::Sound::getDeviation(uint8_t frames_ago, uint8_t seq_id) const { if(history_len < frames_ago) return 255; return history[(history_len - frames_ago) * size + seq_id]; } From 012e97593c3f7ad101edffdc76e17e2e3e7610bb Mon Sep 17 00:00:00 2001 From: Vlad Turcuman Date: Sat, 1 May 2021 16:01:20 +0100 Subject: [PATCH 3/5] Re-sampled the soaring and twinkle sounds and added comments to the audio processor --- inc/EmojiRecogniser.h | 13 +-- inc/MicroBitAudioProcessor.h | 118 +++++++++++++++---- inc/MicroBitSoundRecogniser.h | 8 +- source/EmojiRecogniser.cpp | 150 ++++++++++++++----------- source/MicroBitAudioProcessor.cpp | 97 ++++++++++++---- source/MicroBitMorseCodeRecogniser.cpp | 2 +- source/MicroBitSoundRecogniser.cpp | 28 +---- 7 files changed, 273 insertions(+), 143 deletions(-) diff --git a/inc/EmojiRecogniser.h b/inc/EmojiRecogniser.h index 51a91c81..62134217 100644 --- a/inc/EmojiRecogniser.h +++ b/inc/EmojiRecogniser.h @@ -6,15 +6,14 @@ class EmojiRecogniser : public MicroBitSoundRecogniser { - void addHappySound( MicroBit& ubit); - void addHelloSound( MicroBit& ubit); - void addSadSound( MicroBit& ubit); - void addSoaringSound( MicroBit& ubit); - void addTwinkleSound( MicroBit& ubit); + void addHappySound(); + void addHelloSound(); + void addSadSound(); + void addSoaringSound(); + void addTwinkleSound(); public: - EmojiRecogniser(MicroBitAudioProcessor& processor, - MicroBit& uBit); + EmojiRecogniser(MicroBitAudioProcessor& processor); }; #endif \ No newline at end of file diff --git a/inc/MicroBitAudioProcessor.h b/inc/MicroBitAudioProcessor.h index d8a297fb..e7f41b3f 100644 --- a/inc/MicroBitAudioProcessor.h +++ b/inc/MicroBitAudioProcessor.h @@ -26,16 +26,25 @@ DEALINGS IN THE SOFTWARE. #ifndef MICROBIT_AUDIO_PROCESSOR_H #define MICROBIT_AUDIO_PROCESSOR_H +/* + * Provides the fundamental frequencies in the microphone data. + * + * It takes in the microphone data (sampled at MIC_SAMPLE_RATE Hz + * which is ~11000 Hz now) and produces AudioFrameAnalysis data. + * +*/ + +// Default configuration values #define MIC_SAMPLE_RATE (1000000 / MIC_SAMPLE_DELTA) #define DEFAULT_AUDIO_SAMPLES_NUMBER 512 #define EMOJI_AUDIO_SAMPLES_NUMBER 512 #define MORSE_AUDIO_SAMPLES_NUMBER 128 #define RECOGNITION_START_FREQ 1700 -#define RECOGNITION_END_FREQ 5000 +#define RECOGNITION_END_FREQ 5400 #define ANALYSIS_STD_MULT_THRESHOLD 3 -#define ANALYSIS_STD_THRESHOLD 60 +#define ANALYSIS_STD_THRESHOLD 50 #define ANALYSIS_MEAN_THRESHOLD 0 #define MAXIMUM_NUMBER_OF_FREQUENCIES 3 @@ -45,6 +54,11 @@ class MicroBitAudioProcessor : public DataSink, public DataSource { public: + /* + * An AudioFrameAnalysis has the fundamental frequencies of a + * frame - maximum MAXIMUM_NUMBER_OF_FREQUENCIES and ordered + * from the most likely to the least. + */ struct AudioFrameAnalysis { uint8_t size; uint16_t buf[MAXIMUM_NUMBER_OF_FREQUENCIES]; @@ -52,36 +66,102 @@ class MicroBitAudioProcessor : public DataSink, public DataSource private: - MicroBit& uBit; + DataSource &audiostream; // the stream of data to analyse + DataSink *recogniser; // the recogniser the frequencies should be send to + uint16_t audio_samples_number; // the number of samples to collect before analysing a frame + arm_rfft_fast_instance_f32 fft_instance; // the instance of CMSIS fft that is used to run fft + float *buf; // the buffer to store the incoming data + float *fft_output; // an array to store the result of the fft + float *mag; // an array to store the magnitudes of the frequencies - DataSource &audiostream; - DataSink *recogniser; - int zeroOffset; // unsigned value that is the best effort guess of the zero point of the data source - int divisor; // Used for LINEAR modes - uint16_t audio_samples_number; - arm_rfft_fast_instance_f32 fft_instance; - float *buf; - float *fft_output; - float *mag; + uint16_t buf_len; // the length of the incoming buffer + bool recording; // whether it should analyse the data or be idle - uint16_t position; - bool recording; - - AudioFrameAnalysis output; + AudioFrameAnalysis output; // the result of the analysis + /* + * Converts from frequency to the index in the array. + * + * @param freq a frequency in the range 0 - 5000 Hz. + * + * @return the index to the frequency bucket freq is in + * as it comes out of the fft + */ uint16_t frequencyToIndex(int freq); + + /* + * Converts from the index in the array to frequency. + * + * @param index a index in the range 0 - audio_samples_number / 2. + * + * @return the avg frequency in the bucket + */ float32_t indexToFrequency(int index); public: - MicroBitAudioProcessor(DataSource& source, MicroBit& uBit, uint16_t audio_samples_number = DEFAULT_AUDIO_SAMPLES_NUMBER); + + /* + * Constructor. + * + * Initialize the MicroBitAduioProcessor. + */ + MicroBitAudioProcessor(DataSource& source, uint16_t audio_samples_number = DEFAULT_AUDIO_SAMPLES_NUMBER); + + /* + * Destructor. + * + * Deallocates all the memory allocated dynamically. + */ ~MicroBitAudioProcessor(); + + /* + * A callback for when the data is ready. + * + * Analyses the data when enough of it comes in, using + * the following algorithm: + * + * The audio processor accumulates microphone data as it comes + * in and after getting audio_samples_number of them it process + * the frame. + * + * It transforms the date from time domain to frequency domain + * using the CMSIS fft. + * + * If the mean of the magnitudes of frequnecies is lower than + * ANALYSIS_MEAN_THRESHOLD or the standard deviation (std) is + * lower than ANALYSIS_STD_THRESHOLD then the frame is considered + * silence - no fundamental frequency. + * + * It then filters out the frequencies that have a magnitude lower + * than the mean + ANALYSIS_STD_MULT_THRESHOLD * std. This ensures + * that only outlier frequencies are being considered. + * + * It then filters out the neighbour frequencies around the peaks. + * + * Some of these operations are implemented together to optimize the + * algorithm. + */ virtual int pullRequest(); + + /* + * Allow out downstream component to register itself with us + */ void connect(DataSink *downstream); + + /* + * Provides the next available data to the downstream caller. + */ virtual ManagedBuffer pull(); - int setDivisor(int d); + /* + * Starts recording and analysing. + */ void startRecording(); - void stopRecording(MicroBit& uBit); + + /* + * Stops from recording and analysing. + */ + void stopRecording(); }; #endif \ No newline at end of file diff --git a/inc/MicroBitSoundRecogniser.h b/inc/MicroBitSoundRecogniser.h index 4d6d1bde..e4d1fe10 100644 --- a/inc/MicroBitSoundRecogniser.h +++ b/inc/MicroBitSoundRecogniser.h @@ -15,7 +15,6 @@ class MicroBitSoundRecogniser : public DataSink { private: MicroBitAudioProcessor& audio_proceesor; - MicroBit& uBit; bool analysing; @@ -44,12 +43,9 @@ class MicroBitSoundRecogniser : public DataSink }; struct Sound { - Sound(uint8_t size, uint8_t max_deviation, uint8_t max_history_len, bool consider_all_frequencies, MicroBit& ubit); + Sound(uint8_t size, uint8_t max_deviation, uint8_t max_history_len, bool consider_all_frequencies); ~Sound(); - // debuging only - MicroBit& ubit; - bool consider_all_frequencies; uint8_t max_deviation; uint8_t size; @@ -75,7 +71,7 @@ class MicroBitSoundRecogniser : public DataSink uint8_t max_history_len; }; - MicroBitSoundRecogniser(MicroBitAudioProcessor& processor, MicroBit& uBit); + MicroBitSoundRecogniser(MicroBitAudioProcessor& processor); Sound** sounds; ManagedString** sounds_names; diff --git a/source/EmojiRecogniser.cpp b/source/EmojiRecogniser.cpp index a9f7b8c6..c0e6f467 100644 --- a/source/EmojiRecogniser.cpp +++ b/source/EmojiRecogniser.cpp @@ -1,23 +1,22 @@ #include "EmojiRecogniser.h" -EmojiRecogniser::EmojiRecogniser( MicroBitAudioProcessor& processor, - MicroBit& uBit) - : MicroBitSoundRecogniser(processor, uBit){ +EmojiRecogniser::EmojiRecogniser( MicroBitAudioProcessor& processor ) + : MicroBitSoundRecogniser(processor){ sounds = new Sound* [5]; sounds_names = new ManagedString* [5]; - addHappySound(uBit); - addHelloSound(uBit); - addSadSound(uBit); - addSoaringSound(uBit); - addTwinkleSound(uBit); + addHappySound(); + addHelloSound(); + addSadSound(); + addSoaringSound(); + addTwinkleSound(); } // HAPPY Sound ---- const uint8_t happy_sequences = 2; -const uint8_t happy_max_deviations = 3; +const uint8_t happy_max_deviations = 2; uint16_t happy_samples[happy_sequences][3][11] = { // First sequence @@ -50,13 +49,13 @@ uint8_t happy_nr_samples[happy_sequences] = { }; -void EmojiRecogniser::addHappySound( MicroBit& ubit) { +void EmojiRecogniser::addHappySound() { uint8_t it = sounds_size; sounds_size ++; sounds_names[it] = new ManagedString("happy"); - sounds[it] = new Sound(happy_sequences, happy_max_deviations, 14, true, ubit); + sounds[it] = new Sound(happy_sequences, happy_max_deviations, 14, true); for(uint8_t i = 0; i < happy_sequences; i++){ sounds[it] -> sequences[i] = new SoundSequence(happy_nr_samples[i], happy_thresholds[i], happy_deviations[i]); @@ -102,12 +101,12 @@ uint8_t hello_nr_samples[hello_sequences] = { }; -void EmojiRecogniser::addHelloSound( MicroBit& ubit) { +void EmojiRecogniser::addHelloSound() { uint8_t it = sounds_size; sounds_size ++; sounds_names[it] = new ManagedString("hello"); - sounds[it] = new Sound(hello_sequences, hello_max_deviations, 12, true, ubit); + sounds[it] = new Sound(hello_sequences, hello_max_deviations, 12, true); for(uint8_t i = 0; i < hello_sequences; i++){ sounds[it] -> sequences[i] = new SoundSequence(hello_nr_samples[i], hello_thresholds[i], hello_deviations[i]); @@ -152,12 +151,12 @@ uint8_t sad_nr_samples[sad_sequences] = { }; -void EmojiRecogniser::addSadSound( MicroBit& ubit) { +void EmojiRecogniser::addSadSound() { uint8_t it = sounds_size; sounds_size ++; sounds_names[it] = new ManagedString("sad"); - sounds[it] = new Sound(sad_sequences, sad_max_deviations, 18, true, ubit); + sounds[it] = new Sound(sad_sequences, sad_max_deviations, 18, true); for(uint8_t i = 0; i < sad_sequences; i++){ sounds[it] -> sequences[i] = new SoundSequence(sad_nr_samples[i], sad_thresholds[i], sad_deviations[i]); @@ -169,59 +168,70 @@ void EmojiRecogniser::addSadSound( MicroBit& ubit) { // SOARING Sound ---- -const uint8_t soaring_sequences = 4; -const uint8_t soaring_max_deviations = 15; +const uint8_t soaring_sequences = 7; +const uint8_t soaring_max_deviations = 7; -uint16_t soaring_samples[soaring_sequences][3][10] = { - // First sequence +uint16_t soaring_samples[soaring_sequences][1][7] = { { - { 9, 2499, 2499, 2814, 2688, 2646, 2646, 2898, 2898, 3759}, - { 9, 2520, 2520, 2835, 2667, 2667, 2898, 2898, 2898, 3759}, - { 9, 2499, 2499, 2814, 2814, 2646, 2646, 2898, 2898, 3759} - }, + { 5, 4179, 4179, 4179, 4179, 4179} + }, { - { 7, 3759, 3003, 2646, 2688, 2667, 2667, 4599}, - { 7, 3003, 3003, 2646, 2646, 2667, 2667, 4599}, - { 7, 3759, 3003, 3003, 2646, 2646, 2667, 4599} - }, + { 5, 4284, 4284, 4284, 4284, 4284} + }, { - { 4, 3528, 2625, 2625, 3507}, - { 4, 4599, 2625, 2625, 3507}, - { 4, 4599, 2667, 3528, 3507} - }, + { 5, 4389, 4389, 4389, 4389, 4389} + }, { - { 7, 3528, 3927, 2499, 2499, 2499, 2646, 2646} - } + { 5, 4494, 4494, 4494, 4494, 4494} + }, + { + { 5, 4599, 4599, 4599, 4599, 4599} + }, + { + { 5, 4704, 4704, 4704, 4704, 4704} + }, + { + { 5, 4809, 4809, 4809, 4809, 4809} + } }; const uint16_t soaring_thresholds[soaring_sequences] = { - 150, + 100, + 100, + 100, + 100, 100, 100, 100 }; const uint8_t soaring_deviations[soaring_sequences] = { - 5, - 5, - 5, - 5 -}; - -const uint8_t soaring_nr_samples[soaring_sequences] = { 3, 3, 3, + 3, + 3, + 3, + 3 +}; + +const uint8_t soaring_nr_samples[soaring_sequences] = { + 1, + 1, + 1, + 1, + 1, + 1, 1 }; -void EmojiRecogniser::addSoaringSound( MicroBit& ubit) { +void EmojiRecogniser::addSoaringSound() { uint8_t it = sounds_size; sounds_size ++; sounds_names[it] = new ManagedString("soaring"); - sounds[it] = new Sound(soaring_sequences, soaring_max_deviations, 15, true, ubit); + sounds[it] = new Sound(soaring_sequences, soaring_max_deviations, 15, true); for(uint8_t i = 0; i < soaring_sequences; i++){ sounds[it] -> sequences[i] = new SoundSequence(soaring_nr_samples[i], soaring_thresholds[i], soaring_deviations[i]); @@ -233,50 +243,64 @@ void EmojiRecogniser::addSoaringSound( MicroBit& ubit) { // TWINKLE Sound ---- -const uint8_t twinkle_sequences = 3; -const uint8_t twinkle_max_deviations = 4; +const uint8_t twinkle_sequences = 4; +const uint8_t twinkle_max_deviations = 5; -uint16_t twinkle_samples[twinkle_sequences][2][7] = { +uint16_t twinkle_samples[twinkle_sequences][5][8] = { // First sequence - { - { 4, 2163, 1953, 2604, 2604}, - { 4, 2163, 2163, 2604, 2604} - }, - { - { 5, 2436, 2163, 2604, 2310, 2709}, - { 5, 2436, 2163, 2604, 2604, 2709} - }, - { - { 6, 0, 2604, 2604, 2436, 2520, 2604}, - { 6, 0, 2604, 2604, 2436, 2898, 2604} + { + { 5, 1827, 2709, 3612, 4053, 4809}, + { 6, 1827, 2709, 2709, 3612, 4053, 4809}, + { 6, 1827, 2730, 3612, 4053, 4053, 4809}, + { 6, 1827, 2709, 3591, 3612, 4053, 4809} + }, + { + { 7, 4788, 4767, 4473, 4473, 4011, 3570, 3339}, + { 6, 4788, 4767, 4473, 4032, 3570, 3360}, + { 7, 4809, 4767, 4473, 4032, 3570, 3570, 3339} + }, + { + { 7, 1827, 2625, 2373, 2226, 2016, 2016, 1785}, + { 7, 1827, 1827, 2604, 2373, 2226, 2016, 1785}, + { 7, 1827, 1827, 2373, 2373, 2226, 2016, 1785}, + { 7, 4116, 1827, 2394, 2373, 2226, 2016, 1785}, + { 6, 4137, 2688, 2373, 2226, 2016, 1785} + }, + { + { 6, 2982, 2982, 2688, 2373, 2226, 2016}, + { 6, 3360, 2982, 2688, 2667, 2373, 2226}, + { 6, 3339, 2982, 2982, 2688, 2373, 2226} } }; const uint16_t twinkle_thresholds[twinkle_sequences] = { + 80, 80, 80, 80 }; const uint8_t twinkle_deviations[twinkle_sequences] = { - 2, + 3, + 3, 3, 3 }; const uint8_t twinkle_nr_samples[twinkle_sequences] = { - 2, - 2, - 2 + 4, + 3, + 5, + 3 }; -void EmojiRecogniser::addTwinkleSound( MicroBit& ubit) { +void EmojiRecogniser::addTwinkleSound() { uint8_t it = sounds_size; sounds_size ++; sounds_names[it] = new ManagedString("twinkle"); - sounds[it] = new Sound(twinkle_sequences, twinkle_max_deviations, 15, true, ubit); + sounds[it] = new Sound(twinkle_sequences, twinkle_max_deviations, 11, true); for(uint8_t i = 0; i < twinkle_sequences; i++){ sounds[it] -> sequences[i] = new SoundSequence(twinkle_nr_samples[i], twinkle_thresholds[i], twinkle_deviations[i]); diff --git a/source/MicroBitAudioProcessor.cpp b/source/MicroBitAudioProcessor.cpp index 80e639af..4eea1457 100644 --- a/source/MicroBitAudioProcessor.cpp +++ b/source/MicroBitAudioProcessor.cpp @@ -24,17 +24,17 @@ DEALINGS IN THE SOFTWARE. #include #include - +/* + * Constructor. + * + * Initialize the MicroBitAduioProcessor. + */ MicroBitAudioProcessor::MicroBitAudioProcessor( DataSource& source, - MicroBit& uBit, uint16_t audio_samples_number) : audiostream(source), recogniser(NULL), - uBit(uBit), audio_samples_number(audio_samples_number) { - divisor = 1; - arm_rfft_fast_init_f32(&fft_instance, audio_samples_number); /* Double Buffering: We allocate twice the number of samples*/ @@ -44,10 +44,10 @@ MicroBitAudioProcessor::MicroBitAudioProcessor( DataSource& source, memset(buf, 0, sizeof(buf)); - position = 0; + buf_len = 0; recording = false; - if (buf == NULL || fft_output == NULL || mag == NULL || played == NULL) { + if (buf == NULL || fft_output == NULL || mag == NULL) { DMESG("DEVICE_NO_RESOURCES"); target_panic(DEVICE_OOM); } @@ -55,6 +55,11 @@ MicroBitAudioProcessor::MicroBitAudioProcessor( DataSource& source, audiostream.connect(*this); } +/* + * Destructor. + * + * Deallocates all the memory allocated dynamically. + */ MicroBitAudioProcessor::~MicroBitAudioProcessor() { delete buf; @@ -62,23 +67,74 @@ MicroBitAudioProcessor::~MicroBitAudioProcessor() delete mag; } +/* + * Converts from frequency to the index in the array. + * + * @param freq a frequency in the range 0 - 5000 Hz. + * + * @return the index to the frequency bucket freq is in + * as it comes out of the fft + */ uint16_t MicroBitAudioProcessor::frequencyToIndex(int freq) { return (freq / ((uint32_t)MIC_SAMPLE_RATE / audio_samples_number)); } +/* + * Converts from the index in the array to frequency. + * + * @param index a index in the range 0 - audio_samples_number / 2. + * + * @return the avg frequency in the bucket + */ float32_t MicroBitAudioProcessor::indexToFrequency(int index) { return ((uint32_t)MIC_SAMPLE_RATE / audio_samples_number) * index; } + +/* + * Allow out downstream component to register itself with us + */ void MicroBitAudioProcessor::connect(DataSink *downstream){ recogniser = downstream; } + +/* + * Provides the next available data to the downstream caller. + */ ManagedBuffer MicroBitAudioProcessor::pull() { return ManagedBuffer(((uint8_t *) (&output)), (int) sizeof(AudioFrameAnalysis)); } + +/* + * A callback for when the data is ready. + * + * Analyses the data when enough of it comes in, using + * the following algorithm: + * + * The audio processor accumulates microphone data as it comes + * in and after getting audio_samples_number of them it process + * the frame. + * + * It transforms the date from time domain to frequency domain + * using the CMSIS fft. + * + * If the mean of the magnitudes of frequencies is lower than + * ANALYSIS_MEAN_THRESHOLD or the standard deviation (std) is + * lower than ANALYSIS_STD_THRESHOLD then the frame is considered + * silence - no fundamental frequency. + * + * It then filters out the frequencies that have the magnitude lower + * than the mean + ANALYSIS_STD_MULT_THRESHOLD * std. This ensures + * that only outlier frequencies are being considered. + * + * It then filters out the neighbour frequencies around the peaks. + * + * Some of these operations are implemented together to optimize the + * algorithm. + */ int MicroBitAudioProcessor::pullRequest() { @@ -102,12 +158,12 @@ int MicroBitAudioProcessor::pullRequest() result = s; data++; - buf[position++] = (float)result; + buf[buf_len++] = (float)result; - if (!(position % audio_samples_number)) + if (!(buf_len % audio_samples_number)) { - position = 0; + buf_len = 0; uint16_t from = frequencyToIndex(RECOGNITION_START_FREQ); uint16_t to = min(frequencyToIndex(RECOGNITION_END_FREQ), audio_samples_number / 2); @@ -115,10 +171,6 @@ int MicroBitAudioProcessor::pullRequest() arm_rfft_fast_f32(&fft_instance, buf , fft_output, 0); arm_cmplx_mag_f32(&fft_output[0], mag, to); - float32_t maxFreq = 0; - uint32_t index; - arm_max_f32(&mag[5], (uint32_t)to - 5, &maxFreq, &index); - float32_t mean = 0; float32_t std = 0; @@ -127,10 +179,8 @@ int MicroBitAudioProcessor::pullRequest() float32_t threshold = mean + std * ANALYSIS_STD_MULT_THRESHOLD; - memset(played, 0, sizeof(played)); output.size = 0; - if(std > ANALYSIS_STD_THRESHOLD && mean > ANALYSIS_MEAN_THRESHOLD) { std::vector> freq_played; @@ -172,20 +222,19 @@ int MicroBitAudioProcessor::pullRequest() } -int MicroBitAudioProcessor::setDivisor(int d) -{ - divisor = d; - return DEVICE_OK; -} - - +/* + * Starts recording and analysing. + */ void MicroBitAudioProcessor::startRecording() { this->recording = true; DMESG("START RECORDING"); } -void MicroBitAudioProcessor::stopRecording(MicroBit& uBit) +/* + * Stops from recording and analysing. + */ +void MicroBitAudioProcessor::stopRecording() { this->recording = false; DMESG("STOP RECORDING"); diff --git a/source/MicroBitMorseCodeRecogniser.cpp b/source/MicroBitMorseCodeRecogniser.cpp index 9eb92fda..f5dd20c0 100644 --- a/source/MicroBitMorseCodeRecogniser.cpp +++ b/source/MicroBitMorseCodeRecogniser.cpp @@ -40,7 +40,7 @@ void MicroBitMorseCodeRecogniser::startAnalisying(void (*_callback)(ManagedStrin void MicroBitMorseCodeRecogniser::stopAnalisying(){ analysing = false; buffer_len = 0; - audio_proceesor.stopRecording(uBit); + audio_proceesor.stopRecording(); } diff --git a/source/MicroBitSoundRecogniser.cpp b/source/MicroBitSoundRecogniser.cpp index a3ca5f53..b8f96d4c 100644 --- a/source/MicroBitSoundRecogniser.cpp +++ b/source/MicroBitSoundRecogniser.cpp @@ -1,8 +1,8 @@ #include "MicroBitSoundRecogniser.h" -MicroBitSoundRecogniser::MicroBitSoundRecogniser(MicroBitAudioProcessor& audio_processor, MicroBit& uBit) - : audio_proceesor(audio_processor), uBit(uBit) { +MicroBitSoundRecogniser::MicroBitSoundRecogniser(MicroBitAudioProcessor& audio_processor) + : audio_proceesor(audio_processor){ analysing = false; audio_proceesor.connect(this); buffer_len = 0; @@ -34,12 +34,6 @@ int MicroBitSoundRecogniser::pullRequest(){ buffer[buffer_len].buf[i] = buf[0].buf[i]; buffer_len ++; - - for(int i=0; i resetHistory(); } @@ -108,9 +102,9 @@ MicroBitSoundRecogniser::SoundSequence::~SoundSequence() { delete [] samples; } -MicroBitSoundRecogniser::Sound::Sound(uint8_t size, uint8_t max_deviation, uint8_t max_history_len, bool consider_all_frequencies, MicroBit& ubit) +MicroBitSoundRecogniser::Sound::Sound(uint8_t size, uint8_t max_deviation, uint8_t max_history_len, bool consider_all_frequencies) : size(size), max_deviation(max_deviation), history_len(0), - max_history_len(max_history_len), consider_all_frequencies(consider_all_frequencies), ubit(ubit) { + max_history_len(max_history_len), consider_all_frequencies(consider_all_frequencies) { sequences = new SoundSequence* [size]; history = new uint8_t[2 * max_history_len * size]; } @@ -127,15 +121,12 @@ void MicroBitSoundRecogniser::Sound::update(MicroBitAudioProcessor::AudioFrameAn for(uint8_t seq_it = 0; seq_it < size; seq_it ++) { uint8_t x = matchSequence(seq_it, buffer, buffer_len); addToHistory(seq_it, x); - // if(x <= max_deviation) - // ubit.serial.send(ManagedString("matched seq ") + ManagedString((int)seq_it) + ManagedString(" with dev ") + ManagedString((int) x) + ManagedString("\n")); } endHistoryFrame(); } bool MicroBitSoundRecogniser::Sound::matched() { if(getDeviation(1, size - 1) <= max_deviation){ - // ubit.serial.send(ManagedString((int)getDeviation(1, size - 1)) + ManagedString("\n")); history_len = 0; return true; } @@ -158,10 +149,6 @@ uint8_t MicroBitSoundRecogniser::Sound::matchSequence(uint8_t seq_id, else if (seq_id && deviation > getDeviation(sample_len + 1, seq_id - 1)) deviation = getDeviation(sample_len, seq_id - 1); - // ubit.serial.send(ManagedString("match init dev: ")); - // ubit.serial.send(ManagedString((int) deviation)); - // ubit.serial.send(ManagedString("\n ")); - if(deviation > max_deviation || deviation >= min_dev) continue; uint32_t diff = 0; @@ -193,11 +180,6 @@ uint8_t MicroBitSoundRecogniser::Sound::matchSequence(uint8_t seq_id, } } - // ubit.serial.send(ManagedString("match end dev: ")); - // ubit.serial.send(ManagedString((int) deviation)); - // ubit.serial.send(ManagedString("\n ")); - - if(deviation < min_dev && deviation <= max_deviation) min_dev = deviation; From 7415cf37ecf80a9c0edc8504771152c5824da5aa Mon Sep 17 00:00:00 2001 From: Vlad Turcuman Date: Sat, 1 May 2021 18:40:31 +0100 Subject: [PATCH 4/5] Added comments to the sound recogniser --- inc/MicroBitSoundRecogniser.h | 196 ++++++++++++++++++++++++----- source/MicroBitSoundRecogniser.cpp | 118 ++++++++++++++--- 2 files changed, 267 insertions(+), 47 deletions(-) diff --git a/inc/MicroBitSoundRecogniser.h b/inc/MicroBitSoundRecogniser.h index e4d1fe10..1b419eb3 100644 --- a/inc/MicroBitSoundRecogniser.h +++ b/inc/MicroBitSoundRecogniser.h @@ -7,84 +7,222 @@ #include "MicroBitAudioProcessor.h" #include "arm_math.h" -// Should be the minimum number of elements needed in the buffer -// at any time -#define HISTORY_LENGTH 50 +/* + * + * The sound recogniser takes in data from the audio processor - in + * form of AudioFrameAnalysis. It then tries to match the history + * against samples of sounds. + * + * + * Sound fingerprint + * + * A sound has multiple sequences, each sequence having multiple samples + * to account for the randomness. + * + * Each sequence also has: + * a threshold - the maximum absolute difference from the sampled + * frequency and the heard frequency. + * maximum number of deviations - the maximum number of datapoints + * that can be more than the threshold away from the + * sampled frequency + * + * A sound also has a maximum number of deviations - the total maximum + * deviations across all sequences. + */ + + +/* + * The maximum length of the buffer. + * + * It should be >= than the maximum length of datapoints in + * any sample. + */ +#define HISTORY_LENGTH 30 class MicroBitSoundRecogniser : public DataSink { private: - MicroBitAudioProcessor& audio_proceesor; + + MicroBitAudioProcessor& audio_proceesor; // the stream of data from the audio processor to analyse - bool analysing; + bool analysing; // whether it should analyse the data or be idle - void (*callback)(ManagedString) = NULL; + void (*callback)(ManagedString) = NULL; // the callback function when a sound is detected - MicroBitAudioProcessor::AudioFrameAnalysis buffer[2 * HISTORY_LENGTH]; - uint8_t buffer_len; + MicroBitAudioProcessor::AudioFrameAnalysis buffer[2 * HISTORY_LENGTH]; // the buffer to collect the incoming data in + uint8_t buffer_len; // the length of the buffer protected: + + /* + * A struct to package a sample. + */ struct SoundSample { SoundSample(const uint16_t* _frames, uint8_t size); ~SoundSample(); - uint8_t size; - uint16_t* frames; + uint8_t size; // the number of data points in the sample + const uint16_t* frames; // the data points }; + + /* + * A struct to package a sequence. + */ struct SoundSequence { SoundSequence(uint8_t size, uint32_t threshold, uint8_t deviation); ~SoundSequence(); - uint8_t size; - uint32_t threshold; - uint8_t deviation; - SoundSample** samples; + uint8_t size; // the number of samples + uint32_t threshold; // the threshold for the sequence + uint8_t deviation; // the maximum number of deviations allowed for the sequence + SoundSample** samples; // ponter to the array of samples }; + + /* + * A struct to package a sound. + */ struct Sound { Sound(uint8_t size, uint8_t max_deviation, uint8_t max_history_len, bool consider_all_frequencies); ~Sound(); - bool consider_all_frequencies; - uint8_t max_deviation; - uint8_t size; - SoundSequence** sequences; + bool consider_all_frequencies; // whether or not to consider all frequencies detected or just the dominant one + uint8_t max_deviation; // the maximum total number of deviations allowed + uint8_t size; // the number of sequences in the sound + SoundSequence** sequences; // pointer to the array of sequences - void update( MicroBitAudioProcessor::AudioFrameAnalysis* buffer, - uint8_t buffer_len ); - bool matched(); - void resetHistory(); + /* + * Update called when new data comes in. + * + * @param buffer the buffer with the last data points that came in + * + * @note This keeps the history updated which is needed for the + * dynamic programming approach chosen for matching + */ + void update( MicroBitAudioProcessor::AudioFrameAnalysis* buffer, + uint8_t buffer_len ); + + /* + * Whether or not the sound matched in the last frame. + * + * @return whether or not the sound matched in the last frame. + * + * @note Should be called after update when new data comes in. + */ + bool matched(); + + /* + * Resets the history buffer. + * + * @note Used when the data stops coming in - e.g. when the analyser + * is paused + */ + void resetHistory(); private: + /* + * Matches a sequence to the last couple of data points in the buffer. + * + * @param seq_id the id of the sequence to try to match + * + * @param buffer the buffer of data points that came in + * + * @param buffer_len the length of the buffer + * + * @return the number of deviations in the last data points to the sound up to + * the seq_id sequence or 255 if it's above the maximums allowed. + */ uint8_t matchSequence( uint8_t seq_id, MicroBitAudioProcessor::AudioFrameAnalysis* buffer, uint8_t buffer_len) const; + /* + * Getter for the internal history buffer of the sound. + * + * @param frames_ago the number of frames ago for which the + * query was made + * + * @param seq_id the id of the sequence to get the deviation for + * + * @return the deviation (or 255 if it didn't match) up to the + * sequence seq_id that was frames_ago frames ago. + * + * @note used to check if the sound matched up to the first seq_id + * sequences so that the matching doesn't need to recheck + * those sequences. + */ uint8_t getDeviation(uint8_t frames_ago, uint8_t seq_id) const; + + /* + * Adds to the history buffer a deviation for a certain sequence. + * + * @param seq_id the id of the sequence to add the value to. + * + * @param value the value to be added to the history buffer + */ void addToHistory(uint8_t seq_id, uint8_t value); + + /* + * Ends a history frame, increasing the length of the buffer. + */ void endHistoryFrame(); - uint8_t* history; - uint8_t history_len; - uint8_t max_history_len; + uint8_t* history; // the array of to keep the history buffer in + uint8_t history_len; // the size of the history buffer + uint8_t max_history_len; // the maximum length of the history buffer. Used for double buffering }; + /* + * Constructor. + * + * Initialize the MicroBitSoundRecogniser. + * + * @note is private to make the class abstract. + */ MicroBitSoundRecogniser(MicroBitAudioProcessor& processor); - Sound** sounds; - ManagedString** sounds_names; - uint8_t sounds_size; + Sound** sounds; // pointer to the array of sounds to recognise + ManagedString** sounds_names; // pointer to the array of sound names + uint8_t sounds_size; // the number of sounds to try to recognise public: + + /* + * Destructor. + * + * Deallocates all the memory allocated dynamically. + */ ~MicroBitSoundRecogniser(); + + /* + * A callback for when the data is ready. + */ virtual int pullRequest(); - MicroBitAudioProcessor* getAudioProcessor(); + /* + * Sets the callback. + * + * @TODO change it to send a message on the message bus + * rather than having a callback + */ void setCallback(void (*_callback)(ManagedString)); + + /* + * Starts analysing the data that comes in. Also sets the callback. + * + * @TODO change it to send a message on the message bus + * rather than having a callback + */ void startAnalisying(void (*_callback)(ManagedString)); + + + /* + * Stops analysing the data and also stops the audio processor + * from receiving. + */ void stopAnalisying(); }; diff --git a/source/MicroBitSoundRecogniser.cpp b/source/MicroBitSoundRecogniser.cpp index b8f96d4c..76025d30 100644 --- a/source/MicroBitSoundRecogniser.cpp +++ b/source/MicroBitSoundRecogniser.cpp @@ -1,6 +1,11 @@ #include "MicroBitSoundRecogniser.h" +/* + * Constructor. + * + * Initialize the MicroBitSoundRecogniser. + */ MicroBitSoundRecogniser::MicroBitSoundRecogniser(MicroBitAudioProcessor& audio_processor) : audio_proceesor(audio_processor){ analysing = false; @@ -9,6 +14,11 @@ MicroBitSoundRecogniser::MicroBitSoundRecogniser(MicroBitAudioProcessor& audio_p sounds_size = 0; } +/* + * Destructor. + * + * Deallocates all the memory allocated dynamically. + */ MicroBitSoundRecogniser::~MicroBitSoundRecogniser(){ if(sounds_size != 0){ for(uint8_t i = 0; i < sounds_size; i++) { @@ -20,24 +30,29 @@ MicroBitSoundRecogniser::~MicroBitSoundRecogniser(){ } } + +/* + * A callback for when the data is ready. + */ int MicroBitSoundRecogniser::pullRequest(){ auto frames = audio_proceesor.pull(); if(!analysing) return DEVICE_OK; - // I only send one at a time when calling pullRequest from MicroBitAudioProcessor. - // Is there a way to make them concurrent -- might save some resources? MicroBitAudioProcessor::AudioFrameAnalysis* buf = (MicroBitAudioProcessor::AudioFrameAnalysis* ) &frames[0]; buffer[buffer_len].size = buf[0].size; for(uint8_t i = 0; i resetHistory(); } + MicroBitSoundRecogniser::SoundSample::SoundSample(const uint16_t* _frames, uint8_t size) - : size(size) { - frames = new uint16_t[size]; - memcpy(frames, _frames, sizeof(uint16_t) * size); -} + : size(size), frames(_frames) { } MicroBitSoundRecogniser::SoundSample::~SoundSample() { delete[] frames; @@ -116,6 +141,14 @@ MicroBitSoundRecogniser::Sound::~Sound() { delete [] history; } +/* + * Update called when new data comes in. + * + * @param buffer the buffer with the last data points that came in + * + * @note This keeps the history updated which is needed for the + * dynamic programming approach chosen for matching + */ void MicroBitSoundRecogniser::Sound::update(MicroBitAudioProcessor::AudioFrameAnalysis* buffer, uint8_t buffer_len){ for(uint8_t seq_it = 0; seq_it < size; seq_it ++) { @@ -125,6 +158,13 @@ void MicroBitSoundRecogniser::Sound::update(MicroBitAudioProcessor::AudioFrameAn endHistoryFrame(); } +/* + * Whether or not the sound matched in the last frame. + * + * @return whether or not the sound matched in the last frame. + * + * @note Should be called after update when new data comes in. + */ bool MicroBitSoundRecogniser::Sound::matched() { if(getDeviation(1, size - 1) <= max_deviation){ history_len = 0; @@ -132,7 +172,19 @@ bool MicroBitSoundRecogniser::Sound::matched() { } return false; } - + +/* + * Matches a sequence to the last couple of data points in the buffer. + * + * @param seq_id the id of the sequence to try to match + * + * @param buffer the buffer of data points that came in + * + * @param buffer_len the length of the buffer + * + * @return the number of deviations in the last data points to the sound up to + * the seq_id sequence or 255 if it's above the maximums allowed. + */ uint8_t MicroBitSoundRecogniser::Sound::matchSequence(uint8_t seq_id, MicroBitAudioProcessor::AudioFrameAnalysis* buffer, uint8_t buffer_len) const { @@ -188,25 +240,55 @@ uint8_t MicroBitSoundRecogniser::Sound::matchSequence(uint8_t seq_id, return min_dev; } - +/* + * Getter for the internal history buffer of the sound. + * + * @param frames_ago the number of frames ago for which the + * query was made + * + * @param seq_id the id of the sequence to get the deviation for + * + * @return the deviation (or 255 if it didn't match) up to the + * sequence seq_id that was frames_ago frames ago. + * + * @note used to check if the sound matched up to the first seq_id + * sequences so that the matching doesn't need to recheck + * those sequences. + */ uint8_t MicroBitSoundRecogniser::Sound::getDeviation(uint8_t frames_ago, uint8_t seq_id) const { if(history_len < frames_ago) return 255; return history[(history_len - frames_ago) * size + seq_id]; } +/* + * Adds to the history buffer a deviation for a certain sequence. + * + * @param seq_id the id of the sequence to add the value to. + * + * @param value the value to be added to the history buffer + */ void MicroBitSoundRecogniser::Sound::addToHistory(uint8_t seq_id, uint8_t value){ history[history_len * size + seq_id] = value; } +/* + * Ends a history frame, increasing the length of the buffer. + */ void MicroBitSoundRecogniser::Sound::endHistoryFrame(){ history_len ++; - // same type of buffer as the buffer from MicroBitSoundRecogniser + // double buffering if(history_len == 2 * max_history_len) { memcpy(&history[0], &history[max_history_len * size], sizeof(uint8_t) * max_history_len * size); history_len = max_history_len; } } +/* + * Resets the history buffer. + * + * @note Used when the data stops coming in - e.g. when the analyser + * is paused + */ void MicroBitSoundRecogniser::Sound::resetHistory(){ history_len = 0; } From d15525a550262f5cbde06ece9f650d87c16a045c Mon Sep 17 00:00:00 2001 From: Vlad Turcuman Date: Sat, 1 May 2021 18:43:32 +0100 Subject: [PATCH 5/5] added comments to the emoji recogniser --- inc/EmojiRecogniser.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/inc/EmojiRecogniser.h b/inc/EmojiRecogniser.h index 62134217..16ec6277 100644 --- a/inc/EmojiRecogniser.h +++ b/inc/EmojiRecogniser.h @@ -2,6 +2,44 @@ #ifndef EMOJI_RECOGNISER_H #define EMOJI_RECOGNISER_H +/* + * + * The emoji recogniser is a subclass of sound recogniser that defines + * the actual samples for the emoji sounds. They are just parts of the + * emoji sounds that can be recognised: remain quite consistent across + * multiple plays of the sound. + * + * + * Example + * + * Taking the happy sound as an example, there are a few constants defined: + * + * happy_sequences the number of sequences in the happy sound + * + * happy_max_deviations the maximum number of deviations in the + * sound - i.e. a deviation is considered + * a data point that is more than the allowed + * threshold off the sampled frequency + * + * happy_samples a 3-dimensional array with the sampled sound: + * - the first dimension is the different + * sequences + * - the second is the samples in each sequence + * - the third is the data points in each sample + * of each sequence + * + * happy_thresholds an array with the thresholds for each of the + * sequences + * + * happy_deviations an array with the maximum deviations for each + * sequence + * + * happy_nr_samples an array with the number of samples in each + * sequence + * + * All these are packaged in a Sound struct. + */ + #include "MicroBitSoundRecogniser.h" class EmojiRecogniser : public MicroBitSoundRecogniser