Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 71 additions & 46 deletions VoiceAssistant/wakeword/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,19 @@
import torchaudio
import torch
import numpy as np
from neuralnet.dataset import get_featurizer
from threading import Event
from array import array
from neuralnet.dataset import get_featurizer


class Listener:

def __init__(self, sample_rate=8000, record_seconds=2):
def __init__(self, sample_rate=8000, record_seconds=2, threshold = 300):
self.chunk = 1024
self.sample_rate = sample_rate
self.record_seconds = record_seconds
self.threshold = threshold
self.silent = False

self.p = pyaudio.PyAudio()
self.stream = self.p.open(format=pyaudio.paInt16,
channels=1,
Expand All @@ -25,10 +28,18 @@ def __init__(self, sample_rate=8000, record_seconds=2):
output=True,
frames_per_buffer=self.chunk)

def is_silent(self, data):
"Returns 'True' if below the 'silent' threshold"
return max(data) < self.threshold

def listen(self, queue):
while True:
data = self.stream.read(self.chunk , exception_on_overflow=False)
data = self.stream.read(self.chunk, exception_on_overflow=False)
queue.append(data)
if self.is_silent(array('h', data)):
self.silent = True
else:
self.silent = False
time.sleep(0.01)

def run(self, queue):
Expand All @@ -40,11 +51,12 @@ def run(self, queue):
class WakeWordEngine:

def __init__(self, model_file):
self.listener = Listener(sample_rate=8000, record_seconds=2)
self.listener = Listener(sample_rate=8000, record_seconds=2, threshold = 150)
self.model = torch.jit.load(model_file)
self.model.eval().to('cpu') #run on cpu
self.featurizer = get_featurizer(sample_rate=8000)
self.audio_q = list()
self.prediction = []

def save(self, waveforms, fname="wakeword_temp"):
wf = wave.open(fname, "wb")
Expand All @@ -64,43 +76,67 @@ def save(self, waveforms, fname="wakeword_temp"):
def predict(self, audio):
with torch.no_grad():
fname = self.save(audio)
waveform, _ = torchaudio.load(fname, normalization=False) # don't normalize on train
waveform, _ = torchaudio.load(fname)
mfcc = self.featurizer(waveform).transpose(1, 2).transpose(0, 1)

# TODO: read from buffer instead of saving and loading file
# waveform = torch.Tensor([np.frombuffer(a, dtype=np.int16) for a in audio]).flatten()
# mfcc = self.featurizer(waveform).transpose(0, 1).unsqueeze(1)

out = self.model(mfcc)
pred = torch.round(torch.sigmoid(out))
return pred.item()

def inference_loop(self, action):
value = torch.round(torch.sigmoid(out)).item()
acc = np.asanyarray(torch.sigmoid(out)).tolist()[0][0][0]
return value, acc

def inference_loop(self, callback, sensitivity):
"""
If the situation is silent, it starts predicting.
Args: sensitivity. the lower the number the more sensitive the
wakeword is to activation.
"""
while True:
if len(self.audio_q) > 15: # remove part of stream
diff = len(self.audio_q) - 15
for _ in range(diff):
self.audio_q.pop(0)
action(self.predict(self.audio_q))
elif len(self.audio_q) == 15:
action(self.predict(self.audio_q))
if not self.listener.silent:
if len(self.audio_q) > 25:

while True:
if len(self.audio_q) > 25:
self.audio_q.pop(0)
else:
break

value, acc = self.predict(self.audio_q)

if value == 1.0:
self.prediction.append(acc)
else:
self.prediction = []

time.sleep(0.05)

def run(self, action):
if self.listener.silent and len(self.prediction) > 2: #Change depending on the length of your model
avg_acc = 0
#Calculate sensitivity
for i in self.prediction:
avg_acc += i

avg_acc = avg_acc / len(self.prediction)
self.prediction = []

if avg_acc > sensitivity:
callback()

elif self.listener.silent > 2 and not len(self.prediction):
self.prediction = []

def run(self, callback, sensitivity):
self.listener.run(self.audio_q)
thread = threading.Thread(target=self.inference_loop,
args=(action,), daemon=True)
thread = threading.Thread(target=self.inference_loop, args = (callback, sensitivity), daemon=True)
thread.start()


class DemoAction:
"""This demo action will just randomly say Arnold Schwarzenegger quotes

args: sensitivty. the lower the number the more sensitive the
args: sensitivity. the lower the number the more sensitive the
wakeword is to activation.
"""
def __init__(self, sensitivity=10):
# import stuff here to prevent engine.py from
def __init__(self):
# import stuff here to prevent engine.py from
# importing unecessary modules during production usage
import os
import subprocess
Expand All @@ -111,7 +147,6 @@ def __init__(self, sensitivity=10):
self.subprocess = subprocess
self.detect_in_row = 0

self.sensitivity = sensitivity
folder = realpath(join(realpath(__file__), '..', '..', '..', 'fun', 'arnold_audio'))
self.arnold_mp3 = [
os.path.join(folder, x)
Expand All @@ -120,38 +155,28 @@ def __init__(self, sensitivity=10):
]

def __call__(self, prediction):
if prediction == 1:
self.detect_in_row += 1
if self.detect_in_row == self.sensitivity:
self.play()
self.detect_in_row = 0
else:
self.detect_in_row = 0

def play(self):
filename = self.random.choice(self.arnold_mp3)
try:
print("playing", filename)
self.subprocess.check_output(['play', '-v', '.1', filename])
except Exception as e:
print(str(e))


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="demoing the wakeword engine")
parser.add_argument('--model_file', type=str, default=None, required=True,
help='optimized file to load. use optimize_graph.py')
parser.add_argument('--sensitivty', type=int, default=10, required=False,
parser.add_argument('--sensitivity', type=float, default=0.85, required=False,
help='lower value is more sensitive to activations')

args = parser.parse_args()
wakeword_engine = WakeWordEngine(args.model_file)
action = DemoAction(sensitivity=10)

print("""\n*** Make sure you have sox installed on your system for the demo to work!!!
If you don't want to use sox, change the play function in the DemoAction class
in engine.py module to something that works with your system.\n
""")
# action = lambda x: print(x)
wakeword_engine.run(action)

args = parser.parse_args()
wakeword_engine = WakeWordEngine(args.model_file)
action = DemoAction()

wakeword_engine.run(callback = action, sensitivity = args.sensitivity)
threading.Event().wait()