CoderBotOrg
diff --git a/‎audio.py
Lines changed: 60 additions & 133 deletions b/‎audio.py
Lines changed: 60 additions & 133 deletions
diff --git a/‎conversation.py
Lines changed: 63 additions & 0 deletions b/‎conversation.py
Lines changed: 63 additions & 0 deletions
@@ -52,6 +52,7 @@
 # Audio recording parameters
 RATE = 16000
 CHUNK = int(RATE / 10)  # 100ms
+FORMAT = pyaudio.paInt16 
 
 MODELDIR = "/home/pi/coderbot/psmodels/"
 SOUNDDIR = "./sounds/"
@@ -67,21 +68,25 @@ def get_instance(cls):
     return cls._instance
 
   def __init__(self):
-    self.pyaudio = pyaudio.PyAudio()
+    #self.pyaudio = pyaudio.PyAudio()
     try:
-      self.stream_in = self.pyaudio.open(format=FORMAT, channels=1, input_device_index=2, rate=RATE,
-        input=True,
-        frames_per_buffer=CHUNK_SIZE)
-      self.stream_in.start_stream()
-    except:
+      #self.stream_in = self.pyaudio.open(format=FORMAT, 
+      #  channels=1, rate=RATE,
+      #  input=True,
+      #  frames_per_buffer=CHUNK)
+      #self.stream_in.start_stream()
+      self.stream_in = self.MicrophoneStream(FORMAT, RATE, CHUNK) 
+    except Exception as e:
+      print e
       logging.info("Audio: input stream not available")
 
     self._google_speech_client = speech.SpeechClient()
 
   def exit(self):
+    pass
     # cleanup stuff.
-    self.stream_in.close()  
-    self.pyaudio.terminate()
+    #self.stream_in.close()  
+    #self.pyaudio.terminate()
 
   def say(self, what, locale='en'):
     if what and "$" in what:
@@ -92,50 +97,38 @@ def say(self, what, locale='en'):
   def normalize(self, snd_data):
     "Average the volume out"
     MAXIMUM = 16384
-    #times = float(MAXIMUM) / audioop.rms(snd_data, 2)
     times = float(MAXIMUM)/max(abs(i) for i in snd_data)
-    logging.info("times: " + str(times))
+    print "times: " + str(times)
 
-    r = array('h')
+    r = array('h', snd_data)
+    c = 0
     for i in snd_data:
-        r.append(int(i*times))
+      r[c] = int(i*times)
+      c +=1  
     return r
 
   def record(self, elapse):
     num_silent = 0
     snd_started = False
     c = 0
 
-    r = array('h')
+    r = bytearray()
 
     t = time.time()
-    while time.time() - t < elapse:
-      try:
-        snd_data = array('h', self.stream_in.read(CHUNK_SIZE))
-        r.extend(snd_data)
-      except IOError as ex:
-        if ex[1] != pyaudio.paInputOverflowed:
-          raise
-        #buf = '\x00' * CHUNK_SIZE #white noise
-        logging.info("white noise")
-
-
-    logging.info("read: " + str(len(r)) + " elapse: " + str(time.time() - t))
-
-
-    sample_width = self.pyaudio.get_sample_size(FORMAT)
+    with self.stream_in as stream:
+      audio_generator = stream.generator()
+      for content in audio_generator:
+        r.extend(content)
+        if time.time() - t >= elapse:
+          return r
 
-    r = self.normalize(r)
-
-    return sample_width, r
-
   def record_to_file(self, filename, elapse):
-    sample_width, data = self.record(elapse)
-    data = pack('<' + ('h'*len(data)), *data)
+    data = self.record(elapse)
+    #data = pack('<' + ('h'*len(data)), *data)
 
     wf = wave.open(SOUNDDIR + filename, 'wb')
     wf.setnchannels(1)
-    wf.setsampwidth(sample_width)
+    wf.setsampwidth(self.pyaudio.get_sample_size(FORMAT))
     wf.setframerate(RATE)
     wf.writeframes(data)
     wf.close()
@@ -155,44 +148,31 @@ def play(self, filename):
                 output = True)
 
     # read data (based on the chunk size)
-    data = wf.readframes(CHUNK_SIZE)
+    data = wf.readframes(CHUNK)
 
     # play stream (looping from beginning of file to the end)
     while data != '':
       # writing to the stream is what *actually* plays the sound.
       stream.write(data)
-      data = wf.readframes(CHUNK_SIZE)
+      data = wf.readframes(CHUNK)
       logging.info("play")
 
     # cleanup stuff.
     stream.close()    
     """
 
   def hear(self, level, elapse=1.0):
-    sig_hear = False
     ts_total = time.time()
-    ts_signal = None
 
-    while time.time() - ts_total < elapse:
-      try:
-        snd_data = self.stream_in.read(CHUNK_SIZE)
-        snd_rms = audioop.rms(snd_data, 2)
-        logging.info("snd.rms: " + str(snd_rms))
+    t = time.time()
+    with self.stream_in as stream:
+      audio_generator = stream.generator()
+      for content in audio_generator:
+        snd_rms = audioop.rms(content, 2)
         if snd_rms > level:
-          sig_hear = True
-          break
-      
-      except IOError as ex:
-        if ex[1] != pyaudio.paInputOverflowed:
-          raise
-        buf = '\x00' * CHUNK_SIZE #white noise
-        logging.info("white noise")
-      except AttributeError:
-        pass
-
-
-    return sig_hear
-  
+          return True 
+        if time.time() - t >= elapse:
+          return False
 
   def speech_recog(self, model):
 
@@ -211,27 +191,18 @@ def speech_recog(self, model):
     tstamp = time.time()
     recog_text = ''
 
-    while len(recog_text) < 1:
-      try:
-        buf = self.stream_in.read(CHUNK_SIZE)
-        logging.info("actual voice")
-        decoder.process_raw(buf, False, False)
-        if decoder.hyp().hypstr != '':
+    with self.stream_in as stream:
+      audio_generator = stream.generator()
+      for content in audio_generator:
+        decoder.process_raw(content, False, False)
+        if decoder.hyp() and decoder.hyp().hypstr != '':
           recog_text += decoder.hyp().hypstr
           print "text: " + decoder.hyp().hypstr
           tstamp = time.time()
-      except IOError as ex:
-        if ex[1] != pyaudio.paInputOverflowed:
-          raise
-        buf = '\x00' * CHUNK_SIZE #white noise
-        logging.info("white noise") 
-      except AttributeError:
-        pass
-
-    decoder.end_utt()
-
-    logging.info("recog text: " + recog_text)
-    return recog_text
+          if len(recog_text) > 1:
+            decoder.end_utt()
+            logging.info("recog text: " + recog_text)
+            return recog_text
 
   def speech_recog_google(self, locale):
     config = types.RecognitionConfig(
@@ -243,75 +214,40 @@ def speech_recog_google(self, locale):
         interim_results=False,
         single_utterance=True)
 
-    with self.MicrophoneStream(RATE, CHUNK) as stream:
+    t1 = time.time()
+    with self.stream_in as stream:
       audio_generator = stream.generator()
       requests = (types.StreamingRecognizeRequest(audio_content=content)
                   for content in audio_generator)
 
       responses = self._google_speech_client.streaming_recognize(streaming_config, requests)
 
       # Now, put the transcription responses to use.
-      #self.listen_print_loop(responses)
       for response in responses:
+        if time.time() - t1 > 10:
+          return ""
         if response.results:
           result = response.results[0]
           if result.is_final:
             return result.alternatives[0].transcript
 
-  def listen_print_loop(self, responses):
-    for response in responses:
-      if not response.results:
-        continue
-
-      # The `results` list is consecutive. For streaming, we only care about
-      # the first result being considered, since once it's `is_final`, it
-      # moves on to considering the next utterance.
-      result = response.results[0]
-      if not result.alternatives:
-        continue
-
-      # Display the transcription of the top alternative.
-      transcript = result.alternatives[0].transcript
-
-      # Display interim results, but with a carriage return at the end of the
-      # line, so subsequent lines will overwrite them.
-      #
-      # If the previous result was longer than this one, we need to print
-      # some extra spaces to overwrite the previous result
-      overwrite_chars = ' ' * (num_chars_printed - len(transcript))
-
-      if not result.is_final:
-        sys.stdout.write(transcript + overwrite_chars + '\r')
-        sys.stdout.flush()
-
-        num_chars_printed = len(transcript)
-
-      else:
-        print(transcript + overwrite_chars)
-
-        # Exit recognition if any of the transcribed phrases could be
-        # one of our keywords.
-        if re.search(r'\b(exit|quit)\b', transcript, re.I):
-          print('Exiting..')
-        break
-
-        num_chars_printed = 0
-
-
   class MicrophoneStream(object):
     """Opens a recording stream as a generator yielding the audio chunks."""
-    def __init__(self, rate, chunk):
+    def __init__(self, fmt, rate, chunk):
+      self._audio_interface = None
+      self._format = fmt
       self._rate = rate
       self._chunk = chunk
 
       # Create a thread-safe buffer of audio data
-      self._buff = queue.Queue()
+      self._buff = None
       self.closed = True
 
     def __enter__(self):
       self._audio_interface = pyaudio.PyAudio()
+      self._buff = queue.Queue()
       self._audio_stream = self._audio_interface.open(
-        format=pyaudio.paInt16,
+        format=self._format,
         # The API currently only supports 1-channel (mono) audio
         # https://goo.gl/z757pE
         channels=1, rate=self._rate,
@@ -321,29 +257,20 @@ def __enter__(self):
         # overflow while the calling thread makes network requests, etc.
         stream_callback=self._fill_buffer,
       )
-
       self.closed = False
 
       return self
 
     def __exit__(self, type, value, traceback):
       self._audio_stream.stop_stream()
       self._audio_stream.close()
-      self.closed = True
-      # Signal the generator to terminate so that the client's
-      # streaming_recognize method will not block the process termination.
-      self._buff.put(None)
       self._audio_interface.terminate()
-
-    def __exit__(self, type, value, traceback):
-      self._audio_stream.stop_stream()
-      self._audio_stream.close()
       self.closed = True
-      # Signal the generator to terminate so that the client's
-      # streaming_recognize method will not block the process termination.
       self._buff.put(None)
-      self._audio_interface.terminate()
 
+    def close(self):
+       pass
+ 
     def _fill_buffer(self, in_data, frame_count, time_info, status_flags):
       """Continuously collect data from the audio stream, into the buffer."""
       self._buff.put(in_data)
 
@@ -0,0 +1,63 @@
+# -*- coding:utf8 -*-
+# !/usr/bin/env python
+# Copyright 2017 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os.path
+import sys
+import random
+import json
+
+try:
+    import apiai
+except ImportError:
+    sys.path.append(
+        os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)
+    )
+    import apiai
+
+CLIENT_ACCESS_TOKEN = 'a4c5990369cf4ce08b839abef0d2eac7'
+
+
+class Conversation:
+
+  _instance = None
+  
+  @classmethod
+  def get_instance(cls):
+    if not cls._instance:
+      cls._instance = Conversation()
+    return cls._instance
+
+  def __init__(self):
+    self._ai = apiai.ApiAI(CLIENT_ACCESS_TOKEN)
+    self._session_id = str(int(random.random() * 1000000000000))
+
+  def get_action(self, query, locale):
+    request = self._ai.text_request()
+
+    request.lang = locale 
+
+    request.query = query
+
+    response = request.getresponse()
+
+    data = json.load(response)
+    retval = {}
+    retval["action"] = data["result"]["action"]
+    retval["parameters"] = data["result"]["parameters"]
+    retval["contexts"] = data["result"]["contexts"]
+    retval["response"] = data["result"]["fulfillment"]["speech"]
+
+    return retval